zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
import numpy as np
import random
from xtuner.utils import DEFAULT_IMAGE_TOKEN
import re
def llava_map_fn(example):
messages = example['conversations']
while messages and messages[0]['from'] == 'gpt':
# Skip the first one if it is from gpt
messages = messages[1:]
for msg in messages:
if msg['from'] == 'human':
if DEFAULT_IMAGE_TOKEN in msg['value']:
msg['value'] = msg['value'].replace(DEFAULT_IMAGE_TOKEN,
'').strip()
msg['value'] = DEFAULT_IMAGE_TOKEN + '\n' + msg['value']
msg['value'] = msg['value'].strip()
example['conversations'] = messages
return example