| import os | |
| from datasets import load_dataset | |
| CACHE_DIR = 'cache' | |
| N_SAMPLES = 15 | |
| def load_data(): | |
| df = load_dataset("petrtsv-jb/commit-rewriting-samples", | |
| split="train", | |
| token=os.environ.get('HF_REWRITING_TOKEN'), | |
| cache_dir=CACHE_DIR).to_pandas() | |
| return df.to_dict('records')[:N_SAMPLES] | |