from datasets import load_dataset # 1. Define the name of the dataset from Hugging Face DATASET_NAME = "Alibaba-Apsara/Superior-Reasoning-SFT-gpt-oss-120b" print("Loading Stage 1 of the dataset... this might take a minute.") # 2. Load the dataset (using streaming=True so your computer doesn't crash) dataset = load_dataset(DATASET_NAME, name="stage1", split="train", streaming=True) # 3. Take the first 3 examples to see what they look like for i, example in enumerate(dataset.take(3)): print(f"\n--- EXAMPLE {i+1} ---") print(f"DOMAIN: {example['domain']}") print(f"INPUT: {example['input'][:200]}...") # Showing first 200 chars print(f"REASONING/OUTPUT: {example['output'][:200]}...")