Spaces:
Sleeping
Sleeping
Niki Zhang
commited on
Update app.py
Browse filesFixed the issue of overlapping captions
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import base64
|
| 3 |
import json
|
|
@@ -26,7 +27,7 @@ import tts
|
|
| 26 |
###############################################################################
|
| 27 |
|
| 28 |
|
| 29 |
-
|
| 30 |
|
| 31 |
import os
|
| 32 |
import imageio
|
|
@@ -280,7 +281,7 @@ def make3d(images):
|
|
| 280 |
|
| 281 |
|
| 282 |
gpt_state = 0
|
| 283 |
-
|
| 284 |
article = """
|
| 285 |
<div style='margin:20px auto;'>
|
| 286 |
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
|
@@ -532,7 +533,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 532 |
print(generated_caption)
|
| 533 |
print("new crop save",new_crop_save_path)
|
| 534 |
|
| 535 |
-
yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
|
|
|
|
| 536 |
|
| 537 |
|
| 538 |
|
|
@@ -541,11 +543,27 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 541 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 542 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
| 543 |
print("state",state)
|
| 544 |
-
|
|
|
|
| 545 |
click_index = click_index_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
input_mask = input_mask_state
|
| 547 |
input_points = input_points_state
|
| 548 |
input_labels = input_labels_state
|
|
|
|
|
|
|
|
|
|
| 549 |
focus_map = {
|
| 550 |
"CFV-D":0,
|
| 551 |
"CFV-DA":1,
|
|
@@ -604,10 +622,13 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 604 |
if not args.disable_gpt and text_refiner:
|
| 605 |
print("new crop save",new_crop_save_path)
|
| 606 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
|
|
|
|
|
|
| 607 |
|
| 608 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
| 609 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
| 610 |
print("new_cap",focus_info)
|
|
|
|
| 611 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 612 |
input_points=input_points, input_labels=input_labels)
|
| 613 |
try:
|
|
@@ -774,7 +795,7 @@ def export_chat_log(chat_state):
|
|
| 774 |
return None
|
| 775 |
chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
|
| 776 |
print("export log...")
|
| 777 |
-
print("chat_log",chat_log)
|
| 778 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
| 779 |
temp_file.write(chat_log.encode('utf-8'))
|
| 780 |
temp_file_path = temp_file.name
|
|
@@ -881,6 +902,7 @@ def create_ui():
|
|
| 881 |
input_points_state = gr.State([])
|
| 882 |
input_labels_state = gr.State([])
|
| 883 |
new_crop_save_path = gr.State(None)
|
|
|
|
| 884 |
|
| 885 |
|
| 886 |
|
|
@@ -1028,6 +1050,7 @@ def create_ui():
|
|
| 1028 |
submit_tts = gr.Button(value="Submit", interactive=True)
|
| 1029 |
clear_tts = gr.Button(value="Clear", interactive=True)
|
| 1030 |
|
|
|
|
| 1031 |
|
| 1032 |
###############################################################################
|
| 1033 |
# this part is for 3d generate.
|
|
@@ -1276,9 +1299,9 @@ def create_ui():
|
|
| 1276 |
inputs=[
|
| 1277 |
origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
|
| 1278 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 1279 |
-
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
| 1280 |
],
|
| 1281 |
-
outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
| 1282 |
show_progress=False, queue=True
|
| 1283 |
)
|
| 1284 |
|
|
@@ -1297,6 +1320,15 @@ def create_ui():
|
|
| 1297 |
show_progress=True,
|
| 1298 |
queue=True
|
| 1299 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1300 |
|
| 1301 |
|
| 1302 |
|
|
|
|
| 1 |
+
from math import inf
|
| 2 |
import os
|
| 3 |
import base64
|
| 4 |
import json
|
|
|
|
| 27 |
###############################################################################
|
| 28 |
|
| 29 |
|
| 30 |
+
import spaces
|
| 31 |
|
| 32 |
import os
|
| 33 |
import imageio
|
|
|
|
| 281 |
|
| 282 |
|
| 283 |
gpt_state = 0
|
| 284 |
+
pre_click_index=(inf, inf)
|
| 285 |
article = """
|
| 286 |
<div style='margin:20px auto;'>
|
| 287 |
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
|
|
|
| 533 |
print(generated_caption)
|
| 534 |
print("new crop save",new_crop_save_path)
|
| 535 |
|
| 536 |
+
yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
| 537 |
+
|
| 538 |
|
| 539 |
|
| 540 |
|
|
|
|
| 543 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 544 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
| 545 |
print("state",state)
|
| 546 |
+
|
| 547 |
+
global pre_click_index
|
| 548 |
click_index = click_index_state
|
| 549 |
+
|
| 550 |
+
# if pre_click_index==click_index:
|
| 551 |
+
# click_index = (click_index[0] - 1, click_index[1] - 1)
|
| 552 |
+
# pre_click_index = click_index
|
| 553 |
+
# else:
|
| 554 |
+
# pre_click_index = click_index
|
| 555 |
+
print("click_index",click_index)
|
| 556 |
+
print("pre_click_index",pre_click_index)
|
| 557 |
+
print("input_points_state",input_points_state)
|
| 558 |
+
print("input_labels_state",input_labels_state)
|
| 559 |
+
|
| 560 |
+
|
| 561 |
input_mask = input_mask_state
|
| 562 |
input_points = input_points_state
|
| 563 |
input_labels = input_labels_state
|
| 564 |
+
|
| 565 |
+
|
| 566 |
+
|
| 567 |
focus_map = {
|
| 568 |
"CFV-D":0,
|
| 569 |
"CFV-DA":1,
|
|
|
|
| 622 |
if not args.disable_gpt and text_refiner:
|
| 623 |
print("new crop save",new_crop_save_path)
|
| 624 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
| 625 |
+
if focus_info.startswith('"') and focus_info.endswith('"'):
|
| 626 |
+
focus_info=focus_info[1:-1]
|
| 627 |
|
| 628 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
| 629 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
| 630 |
print("new_cap",focus_info)
|
| 631 |
+
|
| 632 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 633 |
input_points=input_points, input_labels=input_labels)
|
| 634 |
try:
|
|
|
|
| 795 |
return None
|
| 796 |
chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
|
| 797 |
print("export log...")
|
| 798 |
+
print("chat_log", chat_log)
|
| 799 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
| 800 |
temp_file.write(chat_log.encode('utf-8'))
|
| 801 |
temp_file_path = temp_file.name
|
|
|
|
| 902 |
input_points_state = gr.State([])
|
| 903 |
input_labels_state = gr.State([])
|
| 904 |
new_crop_save_path = gr.State(None)
|
| 905 |
+
image_input_nobackground = gr.State(None)
|
| 906 |
|
| 907 |
|
| 908 |
|
|
|
|
| 1050 |
submit_tts = gr.Button(value="Submit", interactive=True)
|
| 1051 |
clear_tts = gr.Button(value="Clear", interactive=True)
|
| 1052 |
|
| 1053 |
+
|
| 1054 |
|
| 1055 |
###############################################################################
|
| 1056 |
# this part is for 3d generate.
|
|
|
|
| 1299 |
inputs=[
|
| 1300 |
origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
|
| 1301 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
| 1302 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
| 1303 |
],
|
| 1304 |
+
outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
|
| 1305 |
show_progress=False, queue=True
|
| 1306 |
)
|
| 1307 |
|
|
|
|
| 1320 |
show_progress=True,
|
| 1321 |
queue=True
|
| 1322 |
)
|
| 1323 |
+
|
| 1324 |
+
|
| 1325 |
+
focus_type.change(
|
| 1326 |
+
lambda x: ([[], [], []], x),
|
| 1327 |
+
[image_input_nobackground],
|
| 1328 |
+
[click_state, image_input],
|
| 1329 |
+
queue=False,
|
| 1330 |
+
show_progress=False
|
| 1331 |
+
)
|
| 1332 |
|
| 1333 |
|
| 1334 |
|