| from datasets import load_dataset | |
| # 1. Define the name of the dataset from Hugging Face | |
| DATASET_NAME = "Alibaba-Apsara/Superior-Reasoning-SFT-gpt-oss-120b" | |
| print("Loading Stage 1 of the dataset... this might take a minute.") | |
| # 2. Load the dataset (using streaming=True so your computer doesn't crash) | |
| dataset = load_dataset(DATASET_NAME, name="stage1", split="train", streaming=True) | |
| # 3. Take the first 3 examples to see what they look like | |
| for i, example in enumerate(dataset.take(3)): | |
| print(f"\n--- EXAMPLE {i+1} ---") | |
| print(f"DOMAIN: {example['domain']}") | |
| print(f"INPUT: {example['input'][:200]}...") # Showing first 200 chars | |
| print(f"REASONING/OUTPUT: {example['output'][:200]}...") | |