Spaces:
Sleeping
Sleeping
File size: 744 Bytes
06e7bdc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import json
from transformers import AutoTokenizer
from mlx_lm_lora.trainer.datasets import load_local_dataset
from types import SimpleNamespace
from pathlib import Path
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("mlx-community/Ministral-3-3B-Instruct-2512-4bit")
config = SimpleNamespace(
train_mode="dpo",
prompt_feature="prompt",
system_feature="system",
chosen_feature="chosen",
rejected_feature="rejected",
mask_prompt=False
)
dpo_path = Path("data/dpo")
print("Loading DPO dataset (this is where it hangs in mlx_lm_lora)...")
train, valid, test = load_local_dataset(dpo_path, tokenizer, config)
print(f"Loaded train={len(train)}, valid={len(valid)}")
print("Sample 0:", train[0])
|