Spaces:
Running on Zero
Running on Zero
Update vlm.py
Browse files
vlm.py
CHANGED
|
@@ -49,39 +49,21 @@ def encode_image(image_path):
|
|
| 49 |
#
|
| 50 |
# Build messages
|
| 51 |
#
|
| 52 |
-
def build_messages(message: dict, history: list[
|
| 53 |
"""Build messages given message & history from a **multimodal** chat interface.
|
| 54 |
Args:
|
| 55 |
message: dictionary with keys: 'text', 'files'
|
| 56 |
-
history: list of
|
| 57 |
|
| 58 |
Returns:
|
| 59 |
list of messages (to be sent to the model)
|
| 60 |
"""
|
| 61 |
logger.info(f"{message=}")
|
| 62 |
logger.info(f"{history=}")
|
|
|
|
| 63 |
# Get the user's text and list of images
|
| 64 |
user_text = message.get("text", "")
|
| 65 |
user_images = message.get("files", []) # List of images
|
| 66 |
-
|
| 67 |
-
# Build the message list including history
|
| 68 |
-
messages = []
|
| 69 |
-
combined_user_input = [] # Combine images and text if found in same turn.
|
| 70 |
-
for user_turn, bot_turn in history:
|
| 71 |
-
if isinstance(user_turn, tuple): # Image input
|
| 72 |
-
image_content = [
|
| 73 |
-
{
|
| 74 |
-
"type": "image_url",
|
| 75 |
-
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
| 76 |
-
} for image in user_turn
|
| 77 |
-
]
|
| 78 |
-
combined_user_input.extend(image_content)
|
| 79 |
-
elif isinstance(user_turn, str): # Text input
|
| 80 |
-
combined_user_input.append({"type": "text", "text": user_turn})
|
| 81 |
-
if combined_user_input and bot_turn:
|
| 82 |
-
messages.append({'role': 'user', 'content': combined_user_input})
|
| 83 |
-
messages.append({'role': 'assistant', 'content': [{"type": "text", "text": bot_turn}]})
|
| 84 |
-
combined_user_input = [] #reset the combined user input.
|
| 85 |
|
| 86 |
# Build the user message's content from the provided message
|
| 87 |
user_content = []
|
|
@@ -94,7 +76,9 @@ def build_messages(message: dict, history: list[tuple]):
|
|
| 94 |
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
| 95 |
}
|
| 96 |
)
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
messages.append({'role': 'user', 'content': user_content})
|
| 99 |
logger.info(f"{messages=}")
|
| 100 |
|
|
|
|
| 49 |
#
|
| 50 |
# Build messages
|
| 51 |
#
|
| 52 |
+
def build_messages(message: dict, history: list[dict]):
|
| 53 |
"""Build messages given message & history from a **multimodal** chat interface.
|
| 54 |
Args:
|
| 55 |
message: dictionary with keys: 'text', 'files'
|
| 56 |
+
history: list of dictionaries
|
| 57 |
|
| 58 |
Returns:
|
| 59 |
list of messages (to be sent to the model)
|
| 60 |
"""
|
| 61 |
logger.info(f"{message=}")
|
| 62 |
logger.info(f"{history=}")
|
| 63 |
+
|
| 64 |
# Get the user's text and list of images
|
| 65 |
user_text = message.get("text", "")
|
| 66 |
user_images = message.get("files", []) # List of images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Build the user message's content from the provided message
|
| 69 |
user_content = []
|
|
|
|
| 76 |
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
| 77 |
}
|
| 78 |
)
|
| 79 |
+
|
| 80 |
+
# Append to the history to create the new messages
|
| 81 |
+
messages = history
|
| 82 |
messages.append({'role': 'user', 'content': user_content})
|
| 83 |
logger.info(f"{messages=}")
|
| 84 |
|