Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -567,8 +567,8 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 567 |
focus_map = {
|
| 568 |
"CFV-D":0,
|
| 569 |
"CFV-DA":1,
|
| 570 |
-
"
|
| 571 |
-
"PFV-
|
| 572 |
}
|
| 573 |
|
| 574 |
mapped_value = focus_map.get(focus_type, -1)
|
|
@@ -632,18 +632,18 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 632 |
focus_info=focus_info[1:-1]
|
| 633 |
|
| 634 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
| 635 |
-
state = state + [(None, f"
|
| 636 |
print("new_cap",focus_info)
|
| 637 |
|
| 638 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 639 |
input_points=input_points, input_labels=input_labels)
|
| 640 |
try:
|
| 641 |
-
waveform_visual, audio_output = tts.predict(focus_info, input_language, input_audio, input_mic, use_mic, agree)
|
| 642 |
return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
| 643 |
except Exception as e:
|
| 644 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 645 |
print(f"Error during TTS prediction: {str(e)}")
|
| 646 |
-
return state, state,
|
| 647 |
|
| 648 |
else:
|
| 649 |
try:
|
|
@@ -652,7 +652,7 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
| 652 |
except Exception as e:
|
| 653 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 654 |
print(f"Error during TTS prediction: {str(e)}")
|
| 655 |
-
return state, state,
|
| 656 |
|
| 657 |
|
| 658 |
def encode_image(image_path):
|
|
@@ -941,7 +941,7 @@ def create_ui():
|
|
| 941 |
material_label = gr.Button(value="Material: ")
|
| 942 |
with gr.Row(scale=1.0):
|
| 943 |
focus_type = gr.Radio(
|
| 944 |
-
choices=["CFV-D", "CFV-DA", "
|
| 945 |
value="CFV-D",
|
| 946 |
label="Focus Type",
|
| 947 |
interactive=True)
|
|
@@ -1320,7 +1320,7 @@ def create_ui():
|
|
| 1320 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
| 1321 |
],
|
| 1322 |
outputs=[
|
| 1323 |
-
chatbot, state,
|
| 1324 |
output_waveform, output_audio
|
| 1325 |
],
|
| 1326 |
show_progress=True,
|
|
|
|
| 567 |
focus_map = {
|
| 568 |
"CFV-D":0,
|
| 569 |
"CFV-DA":1,
|
| 570 |
+
"CFV-DAI":2,
|
| 571 |
+
"PFV-DDA":3
|
| 572 |
}
|
| 573 |
|
| 574 |
mapped_value = focus_map.get(focus_type, -1)
|
|
|
|
| 632 |
focus_info=focus_info[1:-1]
|
| 633 |
|
| 634 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
| 635 |
+
state = state + [(None, f"{focus_info}")]
|
| 636 |
print("new_cap",focus_info)
|
| 637 |
|
| 638 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
| 639 |
input_points=input_points, input_labels=input_labels)
|
| 640 |
try:
|
| 641 |
+
waveform_visual, audio_output = tts.predict(focus_info.replace('#', ''), input_language, input_audio, input_mic, use_mic, agree)
|
| 642 |
return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
| 643 |
except Exception as e:
|
| 644 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 645 |
print(f"Error during TTS prediction: {str(e)}")
|
| 646 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
| 647 |
|
| 648 |
else:
|
| 649 |
try:
|
|
|
|
| 652 |
except Exception as e:
|
| 653 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
| 654 |
print(f"Error during TTS prediction: {str(e)}")
|
| 655 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
| 656 |
|
| 657 |
|
| 658 |
def encode_image(image_path):
|
|
|
|
| 941 |
material_label = gr.Button(value="Material: ")
|
| 942 |
with gr.Row(scale=1.0):
|
| 943 |
focus_type = gr.Radio(
|
| 944 |
+
choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
|
| 945 |
value="CFV-D",
|
| 946 |
label="Focus Type",
|
| 947 |
interactive=True)
|
|
|
|
| 1320 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
| 1321 |
],
|
| 1322 |
outputs=[
|
| 1323 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
| 1324 |
output_waveform, output_audio
|
| 1325 |
],
|
| 1326 |
show_progress=True,
|