Spaces:
Sleeping
Sleeping
ChatExplorer
/
dataset_adapters
/4d52bd9e40bac418bcc390a42ffaf0c0c1e85370628381af2608ddcbfb3a679b.py
| def transform_data(data): | |
| conversations = [] | |
| # Iterate over messages, always processing 'input' and 'instruction' before 'output' | |
| for message in data.get('messages', []): | |
| # Check if it's a 'system' message and place it first if it exists | |
| if message['role'] == 'system': | |
| conversations.insert(0, {'from': 'system', 'value': message['content']}) | |
| elif message['role'] == 'assistant': | |
| # 'assistant' is taken to be 'gpt' | |
| conversations.append({'from': 'gpt', 'value': message['content']}) | |
| else: | |
| # 'user' is taken to be 'human' | |
| # Add 'instruction' directly if there is no 'input' for concatenation | |
| if message.get('role') == 'input' and message.get('content'): | |
| # If there are instructions before the input, we concatenate them. | |
| if conversations and conversations[-1]['from'] == 'human': | |
| conversations[-1]['value'] += '\n\n' + message['content'] | |
| else: | |
| conversations.append({'from': 'human', 'value': message['content']}) | |
| else: | |
| conversations.append({'from': 'human', 'value': message['content']}) | |
| # Check for the order of conversation entries | |
| if conversations and conversations[0]['from'] == 'gpt': | |
| # If the first message is from 'gpt', prepend a 'human' message | |
| conversations.insert(0, {'from': 'human', 'value': ''}) | |
| return conversations | |