File size: 708 Bytes
5418f20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | from datasets import load_dataset
# 1. Define the name of the dataset from Hugging Face
DATASET_NAME = "Alibaba-Apsara/Superior-Reasoning-SFT-gpt-oss-120b"
print("Loading Stage 1 of the dataset... this might take a minute.")
# 2. Load the dataset (using streaming=True so your computer doesn't crash)
dataset = load_dataset(DATASET_NAME, name="stage1", split="train", streaming=True)
# 3. Take the first 3 examples to see what they look like
for i, example in enumerate(dataset.take(3)):
print(f"\n--- EXAMPLE {i+1} ---")
print(f"DOMAIN: {example['domain']}")
print(f"INPUT: {example['input'][:200]}...") # Showing first 200 chars
print(f"REASONING/OUTPUT: {example['output'][:200]}...")
|