Spaces:
Build error
Build error
Update app_dialogue.py
Browse files- app_dialogue.py +85 -131
app_dialogue.py
CHANGED
|
@@ -70,13 +70,20 @@ import tempfile
|
|
| 70 |
|
| 71 |
|
| 72 |
def convert_to_rgb_pil(image):
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# Save the converted image to a temporary file
|
| 78 |
-
#temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
|
| 79 |
-
#temp_file_path = temp_file.name
|
| 80 |
filename = f"{uuid.uuid4()}.jpg"
|
| 81 |
local_path = f"{filename}"
|
| 82 |
|
|
@@ -89,16 +96,30 @@ def convert_to_rgb_pil(image):
|
|
| 89 |
else:
|
| 90 |
image.save(local_path)
|
| 91 |
|
| 92 |
-
#temp_file.close()
|
| 93 |
-
print(f"# Return the path to the saved image as - {local_path}")
|
| 94 |
return local_path # Return the path to the saved image
|
| 95 |
|
| 96 |
|
| 97 |
def convert_to_rgb(filepath_or_pilimg):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
|
| 99 |
# for transparent images. The call to `alpha_composite` handles this case
|
| 100 |
-
print(f"***** convert_to_rgb ******")
|
| 101 |
-
print(f"params: image is - {filepath_or_pilimg}")
|
| 102 |
|
| 103 |
if isinstance(filepath_or_pilimg, PIL.Image.Image):
|
| 104 |
return convert_to_rgb_pil(filepath_or_pilimg)
|
|
@@ -123,26 +144,15 @@ def convert_to_rgb(filepath_or_pilimg):
|
|
| 123 |
# Save the converted image to a temporary file
|
| 124 |
filename = f"{uuid.uuid4()}.jpg"
|
| 125 |
local_path = f"{filename}"
|
| 126 |
-
#temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
|
| 127 |
-
#temp_file_path = temp_file.name
|
| 128 |
alpha_composite.save(local_path)
|
| 129 |
-
#temp_file.close()
|
| 130 |
|
| 131 |
-
print(f"# Return the path to the saved image as - {local_path}")
|
| 132 |
return local_path # Return the path to the saved image
|
| 133 |
|
|
|
|
| 134 |
def pil_to_markdown_im(image):
|
| 135 |
"""
|
| 136 |
Convert a PIL image into markdown filled with the base64 string representation.
|
| 137 |
"""
|
| 138 |
-
print(f"***** pil_to_markdown_im ******")
|
| 139 |
-
print(f"params: image is - {image}")
|
| 140 |
-
#if isinstance(image, PIL.Image.Image):
|
| 141 |
-
#img_b64_str = pil_to_base64(image)
|
| 142 |
-
#img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
|
| 143 |
-
#if path_or_url.startswith(("http://", "https://")):
|
| 144 |
-
#response = requests.get(image)
|
| 145 |
-
#image = Image.open(BytesIO(response.content))
|
| 146 |
# Generate a unique filename using UUID
|
| 147 |
filename = f"{uuid.uuid4()}.jpg"
|
| 148 |
local_path = f"{filename}"
|
|
@@ -156,7 +166,7 @@ def base64_to_pil(encoded_image):
|
|
| 156 |
pil_image = Image.open(BytesIO(decoded_image))
|
| 157 |
return pil_image
|
| 158 |
|
| 159 |
-
|
| 160 |
def im_markdown_to_pil(im_markdown_str):
|
| 161 |
pattern = r'<img src="data:image/png;base64,([^"]+)" />'
|
| 162 |
match = re.search(pattern, im_markdown_str)
|
|
@@ -218,33 +228,20 @@ def isolate_images_urls(prompt_list):
|
|
| 218 |
]
|
| 219 |
```
|
| 220 |
"""
|
| 221 |
-
print(f"******* isolate_images_urls *******")
|
| 222 |
-
print(f"params: prompt_list is - {prompt_list}")
|
| 223 |
|
| 224 |
linearized_list = []
|
| 225 |
for prompt in prompt_list:
|
| 226 |
-
print(f"inside FOR loop: prompt in prompt_list is - {prompt}")
|
| 227 |
# Prompt can be either a string, or a PIL image
|
| 228 |
if isinstance(prompt, PIL.Image.Image):
|
| 229 |
-
print(f"inside first IF in FOR loop: prompt is of type PIL.Image.Image")
|
| 230 |
linearized_list.append(prompt)
|
| 231 |
-
print(f"linearized_list after append is - {linearized_list}")
|
| 232 |
elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
|
| 233 |
-
print(f"inside IF in FOR loop: prompt is a string and is a path for temporary file")
|
| 234 |
linearized_list.append(prompt)
|
| 235 |
-
print(f"linearized_list after append is - {linearized_list}")
|
| 236 |
elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
|
| 237 |
-
print(f"inside ELIF in FOR loop: prompt is a string and is NOT a path for temporary file")
|
| 238 |
if "<fake_token_around_image>" not in prompt:
|
| 239 |
-
print(f"inside IF inside ELIF in FOR loop: '<fake_token_around_image>' is NOT in prompt")
|
| 240 |
linearized_list.append(prompt)
|
| 241 |
-
print(f"linearized_list after append is - {linearized_list}")
|
| 242 |
else:
|
| 243 |
-
print(f"inside ELSE inside ELIF in FOR loop: '<fake_token_around_image>' IS IN prompt")
|
| 244 |
prompt_splitted = prompt.split("<fake_token_around_image>")
|
| 245 |
-
print(f"prompt_splitted is - {prompt_splitted}")
|
| 246 |
for ps in prompt_splitted:
|
| 247 |
-
print(f"Inside FOR loop inside FOR loop: ps in prompt_split is {ps}")
|
| 248 |
if ps == "":
|
| 249 |
continue
|
| 250 |
if ps.startswith("<image:"):
|
|
@@ -256,7 +253,6 @@ def isolate_images_urls(prompt_list):
|
|
| 256 |
f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
|
| 257 |
" `PIL.Image.Image`]"
|
| 258 |
)
|
| 259 |
-
print(f"linearized_list to be returned is - {linearized_list}")
|
| 260 |
return linearized_list
|
| 261 |
|
| 262 |
|
|
@@ -285,31 +281,20 @@ def user_prompt_list_to_markdown(user_prompt_list: List[Union[str, PIL.Image.Ima
|
|
| 285 |
Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
|
| 286 |
the markdown format that is used for the chatbot history and rendering.
|
| 287 |
"""
|
| 288 |
-
print("********** user_prompt_list_to_markdown *********")
|
| 289 |
-
print(f" param : user_prompt_list is - {user_prompt_list}")
|
| 290 |
resulting_string = ""
|
| 291 |
for elem in user_prompt_list:
|
| 292 |
-
print(f"inside user_prompt_list_to_markdown, for loop on user_prompt_list")
|
| 293 |
-
print(f"elem is - {elem} ")
|
| 294 |
if isinstance(elem, str):
|
| 295 |
if "/tmp/gradio/" not in elem:
|
| 296 |
resulting_string += elem
|
| 297 |
-
print(f"inside IF - when elem is string and is not temp image filepath. resulting_string is - {resulting_string}")
|
| 298 |
elif "/tmp/gradio/" in elem:
|
| 299 |
resulting_string += f"})"
|
| 300 |
-
print(f"inside IF - when elem is string and is a temp image filepath. resulting_string is - {resulting_string}")
|
| 301 |
-
#elif isinstance(elem, str) and "/tmp/gradio/" in elem:
|
| 302 |
-
# resulting_string += f"})" #f""
|
| 303 |
-
# print(f"inside first ELIF - when elem is string and is the temp image filepath. resulting_string is - {resulting_string}")
|
| 304 |
elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem: #and "/tmp/gradio/" in elem:
|
| 305 |
resulting_string += f"})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
|
| 306 |
-
print(f"inside the ELIF - when elem is an instance of PIL.Image.Image. The resulting_string after convert_to_rgb() operation is - {resulting_string}")
|
| 307 |
else:
|
| 308 |
raise ValueError(
|
| 309 |
"Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
|
| 310 |
f" `{type(elem)}`"
|
| 311 |
)
|
| 312 |
-
print(f" final resulting_string that will be returned is - {resulting_string}")
|
| 313 |
return resulting_string
|
| 314 |
|
| 315 |
|
|
@@ -348,8 +333,6 @@ def load_processor_tokenizer_model(model_name):
|
|
| 348 |
max_memory=max_memory_map,
|
| 349 |
)
|
| 350 |
model.eval()
|
| 351 |
-
print("Current device map:", model.hf_device_map)
|
| 352 |
-
print("Model default generation config:", model.generation_config)
|
| 353 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
| 354 |
return processor, tokenizer, model
|
| 355 |
|
|
@@ -361,60 +344,41 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
| 361 |
Produces the resulting list that needs to go inside the processor.
|
| 362 |
It handles the potential image box input, the history and the system conditionning.
|
| 363 |
"""
|
| 364 |
-
print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
|
| 365 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
|
| 366 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
|
| 367 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
|
| 368 |
|
| 369 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
| 370 |
|
| 371 |
# Format history
|
| 372 |
for turn in history:
|
| 373 |
-
print(f"inside for loop, turn is - {turn}")
|
| 374 |
user_utterance, assistant_utterance = turn
|
| 375 |
-
print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
|
| 376 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
]
|
| 383 |
-
print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
|
| 384 |
|
| 385 |
if isinstance(splitted_user_utterance[0], str):
|
| 386 |
resulting_list.append("\nUser: ")
|
| 387 |
else:
|
| 388 |
resulting_list.append("\nUser:")
|
| 389 |
-
print(f"resulting_list after if..else block is - {resulting_list}")
|
| 390 |
resulting_list.extend(splitted_user_utterance)
|
| 391 |
-
print(f"resulting_list after extend is - {resulting_list}")
|
| 392 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
| 393 |
-
print(f"resulting_list after append is - {resulting_list}")
|
| 394 |
|
| 395 |
|
| 396 |
# Format current input
|
| 397 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
| 398 |
-
print(f"current_user_prompt_str is - {current_user_prompt_str}")
|
| 399 |
|
| 400 |
if current_image is None:
|
| 401 |
-
print("inside IF : current_image is NONE")
|
| 402 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
| 403 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
| 404 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
| 405 |
-
print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
|
| 406 |
resulting_list.append("\nUser: ")
|
| 407 |
-
print(f"resulting_list with append user - {resulting_list}")
|
| 408 |
resulting_list.extend(current_user_prompt_list)
|
| 409 |
-
print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
|
| 410 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
| 411 |
-
print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
|
| 412 |
return resulting_list, current_user_prompt_list
|
| 413 |
else:
|
| 414 |
-
|
| 415 |
-
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
| 416 |
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
| 417 |
-
print(f"final resulting_list passed on to calling function is - {resulting_list}")
|
| 418 |
return resulting_list, [current_user_prompt_str]
|
| 419 |
|
| 420 |
|
|
@@ -836,11 +800,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
| 836 |
penalty_alpha,
|
| 837 |
):
|
| 838 |
# global processor, model, tokenizer
|
| 839 |
-
print("***********Model_inference*************")
|
| 840 |
-
print(f"Inside Model_inference, user_prompt_str is - {user_prompt_str} ")
|
| 841 |
-
print(f"Inside Model_inference, chat_history is - {chat_history} ")
|
| 842 |
-
print(f"Inside Model_inference, image type is - {type(image)} ")
|
| 843 |
-
print(f"Inside Model_inference, image is - {image} ")
|
| 844 |
|
| 845 |
force_words = ""
|
| 846 |
hide_special_tokens = False
|
|
@@ -851,9 +810,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
| 851 |
history=chat_history,
|
| 852 |
)
|
| 853 |
|
| 854 |
-
print(f"formated_prompt_list (or resulting_list) is {formated_prompt_list}")
|
| 855 |
-
print(f"user_prompt_list (or [current_user_prompt_str]) is {user_prompt_list}")
|
| 856 |
-
|
| 857 |
generated_text = model_generation(
|
| 858 |
prompt_list=formated_prompt_list,
|
| 859 |
processor=processor,
|
|
@@ -881,7 +837,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
| 881 |
chat_history.append(
|
| 882 |
(user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
|
| 883 |
)
|
| 884 |
-
print(f"chat_history (IF image is None or is with fake token) is -{chat_history}")
|
| 885 |
else:
|
| 886 |
# Case where the image is passed through the Image Box.
|
| 887 |
# Convert the image into base64 for both passing it through the chat history and
|
|
@@ -892,7 +847,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
| 892 |
generated_text.strip("<end_of_utterance>"),
|
| 893 |
)
|
| 894 |
)
|
| 895 |
-
print(f"chat_history (ELSE IF image is available) is -{chat_history}")
|
| 896 |
return "", None, chat_history
|
| 897 |
|
| 898 |
|
|
@@ -1045,52 +999,52 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
| 1045 |
examples=[
|
| 1046 |
["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
| 1047 |
[
|
| 1048 |
-
|
| 1049 |
-
|
| 1050 |
-
|
| 1051 |
-
["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
|
| 1052 |
-
["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
| 1053 |
-
[
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
],
|
| 1057 |
-
["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
|
| 1058 |
-
["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
| 1059 |
-
[
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
],
|
| 1063 |
-
[
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
],
|
| 1067 |
-
["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
|
| 1068 |
-
["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
| 1069 |
-
[
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
],
|
| 1073 |
-
[
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
],
|
| 1077 |
-
["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
|
| 1078 |
-
[
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
],
|
| 1082 |
-
["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
|
| 1083 |
-
[
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
],
|
| 1087 |
-
[
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
],
|
| 1094 |
],
|
| 1095 |
inputs=[textbox, imagebox],
|
| 1096 |
outputs=[textbox, imagebox, chatbot],
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def convert_to_rgb_pil(image):
|
| 73 |
+
"""
|
| 74 |
+
Convert a PIL Image object to RGB mode and save it locally.
|
| 75 |
+
|
| 76 |
+
The function ensures that images with transparency (alpha channel)
|
| 77 |
+
are overlaid on a white background before saving.
|
| 78 |
+
|
| 79 |
+
Parameters:
|
| 80 |
+
- image (PIL.Image.Image): The input image to be processed.
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
- str: The path to the saved RGB image.
|
| 84 |
+
|
| 85 |
+
"""
|
| 86 |
# Save the converted image to a temporary file
|
|
|
|
|
|
|
| 87 |
filename = f"{uuid.uuid4()}.jpg"
|
| 88 |
local_path = f"{filename}"
|
| 89 |
|
|
|
|
| 96 |
else:
|
| 97 |
image.save(local_path)
|
| 98 |
|
|
|
|
|
|
|
| 99 |
return local_path # Return the path to the saved image
|
| 100 |
|
| 101 |
|
| 102 |
def convert_to_rgb(filepath_or_pilimg):
|
| 103 |
+
"""
|
| 104 |
+
Convert an image to RGB mode, handling transparency for non-RGB images.
|
| 105 |
+
|
| 106 |
+
This function can accept either a file path to an image or a PIL Image object.
|
| 107 |
+
For transparent images, the function overlays the image onto a white background
|
| 108 |
+
to handle the transparency before converting it to RGB mode.
|
| 109 |
+
|
| 110 |
+
Parameters:
|
| 111 |
+
- filepath_or_pilimg (str or PIL.Image.Image): The file path to an image or a PIL
|
| 112 |
+
Image object to be processed.
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
- str: If the input was a file path, the return will be the path to the original
|
| 116 |
+
image (if it's already in RGB) or the path to the saved RGB image.
|
| 117 |
+
If the input was a PIL Image object, the return will be the path to the saved
|
| 118 |
+
RGB image.
|
| 119 |
+
|
| 120 |
+
"""
|
| 121 |
# `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
|
| 122 |
# for transparent images. The call to `alpha_composite` handles this case
|
|
|
|
|
|
|
| 123 |
|
| 124 |
if isinstance(filepath_or_pilimg, PIL.Image.Image):
|
| 125 |
return convert_to_rgb_pil(filepath_or_pilimg)
|
|
|
|
| 144 |
# Save the converted image to a temporary file
|
| 145 |
filename = f"{uuid.uuid4()}.jpg"
|
| 146 |
local_path = f"{filename}"
|
|
|
|
|
|
|
| 147 |
alpha_composite.save(local_path)
|
|
|
|
| 148 |
|
|
|
|
| 149 |
return local_path # Return the path to the saved image
|
| 150 |
|
| 151 |
+
# XXXX REMOVE
|
| 152 |
def pil_to_markdown_im(image):
|
| 153 |
"""
|
| 154 |
Convert a PIL image into markdown filled with the base64 string representation.
|
| 155 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
# Generate a unique filename using UUID
|
| 157 |
filename = f"{uuid.uuid4()}.jpg"
|
| 158 |
local_path = f"{filename}"
|
|
|
|
| 166 |
pil_image = Image.open(BytesIO(decoded_image))
|
| 167 |
return pil_image
|
| 168 |
|
| 169 |
+
# XXXXX REMOVE
|
| 170 |
def im_markdown_to_pil(im_markdown_str):
|
| 171 |
pattern = r'<img src="data:image/png;base64,([^"]+)" />'
|
| 172 |
match = re.search(pattern, im_markdown_str)
|
|
|
|
| 228 |
]
|
| 229 |
```
|
| 230 |
"""
|
|
|
|
|
|
|
| 231 |
|
| 232 |
linearized_list = []
|
| 233 |
for prompt in prompt_list:
|
|
|
|
| 234 |
# Prompt can be either a string, or a PIL image
|
| 235 |
if isinstance(prompt, PIL.Image.Image):
|
|
|
|
| 236 |
linearized_list.append(prompt)
|
|
|
|
| 237 |
elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
|
|
|
|
| 238 |
linearized_list.append(prompt)
|
|
|
|
| 239 |
elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
|
|
|
|
| 240 |
if "<fake_token_around_image>" not in prompt:
|
|
|
|
| 241 |
linearized_list.append(prompt)
|
|
|
|
| 242 |
else:
|
|
|
|
| 243 |
prompt_splitted = prompt.split("<fake_token_around_image>")
|
|
|
|
| 244 |
for ps in prompt_splitted:
|
|
|
|
| 245 |
if ps == "":
|
| 246 |
continue
|
| 247 |
if ps.startswith("<image:"):
|
|
|
|
| 253 |
f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
|
| 254 |
" `PIL.Image.Image`]"
|
| 255 |
)
|
|
|
|
| 256 |
return linearized_list
|
| 257 |
|
| 258 |
|
|
|
|
| 281 |
Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
|
| 282 |
the markdown format that is used for the chatbot history and rendering.
|
| 283 |
"""
|
|
|
|
|
|
|
| 284 |
resulting_string = ""
|
| 285 |
for elem in user_prompt_list:
|
|
|
|
|
|
|
| 286 |
if isinstance(elem, str):
|
| 287 |
if "/tmp/gradio/" not in elem:
|
| 288 |
resulting_string += elem
|
|
|
|
| 289 |
elif "/tmp/gradio/" in elem:
|
| 290 |
resulting_string += f"})"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem: #and "/tmp/gradio/" in elem:
|
| 292 |
resulting_string += f"})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
|
|
|
|
| 293 |
else:
|
| 294 |
raise ValueError(
|
| 295 |
"Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
|
| 296 |
f" `{type(elem)}`"
|
| 297 |
)
|
|
|
|
| 298 |
return resulting_string
|
| 299 |
|
| 300 |
|
|
|
|
| 333 |
max_memory=max_memory_map,
|
| 334 |
)
|
| 335 |
model.eval()
|
|
|
|
|
|
|
| 336 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
| 337 |
return processor, tokenizer, model
|
| 338 |
|
|
|
|
| 344 |
Produces the resulting list that needs to go inside the processor.
|
| 345 |
It handles the potential image box input, the history and the system conditionning.
|
| 346 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
| 349 |
|
| 350 |
# Format history
|
| 351 |
for turn in history:
|
|
|
|
| 352 |
user_utterance, assistant_utterance = turn
|
|
|
|
| 353 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
| 354 |
+
#splitted_user_utterance = [
|
| 355 |
+
# im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
| 356 |
+
# for s in splitted_user_utterance
|
| 357 |
+
# if s != ""
|
| 358 |
+
#]
|
|
|
|
|
|
|
| 359 |
|
| 360 |
if isinstance(splitted_user_utterance[0], str):
|
| 361 |
resulting_list.append("\nUser: ")
|
| 362 |
else:
|
| 363 |
resulting_list.append("\nUser:")
|
|
|
|
| 364 |
resulting_list.extend(splitted_user_utterance)
|
|
|
|
| 365 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
|
|
|
| 366 |
|
| 367 |
|
| 368 |
# Format current input
|
| 369 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
|
|
|
| 370 |
|
| 371 |
if current_image is None:
|
|
|
|
| 372 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
| 373 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
| 374 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
|
|
|
| 375 |
resulting_list.append("\nUser: ")
|
|
|
|
| 376 |
resulting_list.extend(current_user_prompt_list)
|
|
|
|
| 377 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
|
|
|
| 378 |
return resulting_list, current_user_prompt_list
|
| 379 |
else:
|
| 380 |
+
# Choosing to put the image first when the image is inputted through the UI, but this is an arbitrary choice.
|
|
|
|
| 381 |
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
|
|
|
| 382 |
return resulting_list, [current_user_prompt_str]
|
| 383 |
|
| 384 |
|
|
|
|
| 800 |
penalty_alpha,
|
| 801 |
):
|
| 802 |
# global processor, model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
|
| 804 |
force_words = ""
|
| 805 |
hide_special_tokens = False
|
|
|
|
| 810 |
history=chat_history,
|
| 811 |
)
|
| 812 |
|
|
|
|
|
|
|
|
|
|
| 813 |
generated_text = model_generation(
|
| 814 |
prompt_list=formated_prompt_list,
|
| 815 |
processor=processor,
|
|
|
|
| 837 |
chat_history.append(
|
| 838 |
(user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
|
| 839 |
)
|
|
|
|
| 840 |
else:
|
| 841 |
# Case where the image is passed through the Image Box.
|
| 842 |
# Convert the image into base64 for both passing it through the chat history and
|
|
|
|
| 847 |
generated_text.strip("<end_of_utterance>"),
|
| 848 |
)
|
| 849 |
)
|
|
|
|
| 850 |
return "", None, chat_history
|
| 851 |
|
| 852 |
|
|
|
|
| 999 |
examples=[
|
| 1000 |
["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
| 1001 |
[
|
| 1002 |
+
"Can you tell me a very short story based on this image?",
|
| 1003 |
+
f"{examples_path}/example_images/chicken_on_money.png",
|
| 1004 |
+
],
|
| 1005 |
+
# ["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
|
| 1006 |
+
# ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
| 1007 |
+
# [
|
| 1008 |
+
# "What is this object and do you think it is horrifying?",
|
| 1009 |
+
# f"{examples_path}/example_images/can_horror.png",
|
| 1010 |
+
# ],
|
| 1011 |
+
# ["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
|
| 1012 |
+
# ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
| 1013 |
+
# [
|
| 1014 |
+
# "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?",
|
| 1015 |
+
# f"{examples_path}/example_images/obama-harry-potter.jpg",
|
| 1016 |
+
# ],
|
| 1017 |
+
# [
|
| 1018 |
+
# "Is there a celebrity look-alike in this image? What is happening to the person?",
|
| 1019 |
+
# f"{examples_path}/example_images/ryan-reynolds-borg.jpg",
|
| 1020 |
+
# ],
|
| 1021 |
+
# ["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
|
| 1022 |
+
# ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
| 1023 |
+
# [
|
| 1024 |
+
# "Can you write an advertisement for Coca-Cola based on this image?",
|
| 1025 |
+
# f"{examples_path}/example_images/polar_bear_coke.png",
|
| 1026 |
+
# ],
|
| 1027 |
+
# [
|
| 1028 |
+
# "What is the rabbit doing in this image? Do you think this image is real?",
|
| 1029 |
+
# f"{examples_path}/example_images/rabbit_force.png",
|
| 1030 |
+
# ],
|
| 1031 |
+
# ["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
|
| 1032 |
+
# [
|
| 1033 |
+
# "What I should look most forward to when I visit this place?",
|
| 1034 |
+
# f"{examples_path}/example_images/tree_fortress.jpg",
|
| 1035 |
+
# ],
|
| 1036 |
+
# ["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
|
| 1037 |
+
# [
|
| 1038 |
+
# "What is happening in this image? Which famous personality does this person in center looks like?",
|
| 1039 |
+
# f"{examples_path}/example_images/gandhi_selfie.jpg",
|
| 1040 |
+
# ],
|
| 1041 |
+
# [
|
| 1042 |
+
# (
|
| 1043 |
+
# "<fake_token_around_image><image:https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/stable-diffusion-xl-coreml/a_high_quality_photo_of_a_surfing_dog.7667.final_float16_original.jpg><fake_token_around_image>What"
|
| 1044 |
+
# " do you think the dog is doing and is it unusual?"
|
| 1045 |
+
# ),
|
| 1046 |
+
# None,
|
| 1047 |
+
# ],
|
| 1048 |
],
|
| 1049 |
inputs=[textbox, imagebox],
|
| 1050 |
outputs=[textbox, imagebox, chatbot],
|