Spaces:
Sleeping
Sleeping
| # Tokenization/pretraining/instruction_formatter.py | |
| class InstructionFormatter: | |
| def format_sample(sample): | |
| """ | |
| Formats a sample dict with 'instruction', 'input', and 'output' fields. | |
| This is a placeholder; customize as needed for your data. | |
| """ | |
| # Ensure required fields exist | |
| instruction = sample.get("instruction", "") | |
| input_ = sample.get("input", "") | |
| output = sample.get("output", "") | |
| return { | |
| "instruction": instruction.strip(), | |
| "input": input_.strip(), | |
| "output": output.strip(), | |
| } | |