| from xtuner.utils import DEFAULT_IMAGE_TOKEN | |
| def solo_data_llava_map_fn(example): | |
| messages = example['conversations'] | |
| input = '' | |
| conversation = [] | |
| while messages and messages[0]['from'] == 'gpt': | |
| # Skip the first one if it is from gpt | |
| messages = messages[1:] | |
| for msg in messages: | |
| # For SOLO data: Image SFT pairs and language data. | |
| if 'from' in msg.keys() or 'role' in msg.keys(): | |
| if msg['from'] == 'human' or msg['from'] == 'user' or msg['role'] == 'user': | |
| if DEFAULT_IMAGE_TOKEN in msg['value']: | |
| msg['value'] = msg['value'].replace(DEFAULT_IMAGE_TOKEN, | |
| '').strip() | |
| msg['value'] = DEFAULT_IMAGE_TOKEN + '\n' + msg['value'] | |
| msg['value'] = msg['value'].strip() | |
| input += msg['value'] | |
| elif msg['from'] == 'gpt' or msg['from'] == 'model' or \ | |
| msg['role'] == 'assistant' or msg['from'] == 'assistant' or \ | |
| msg['from'] == 'assistnat': | |
| conversation.append({'input': input, 'output': msg['value']}) | |
| input = '' | |
| else: | |
| raise NotImplementedError | |
| return {'conversation': conversation} |