|
|
|
|
|
""" |
|
|
Check the actual fields in the dataset. |
|
|
""" |
|
|
|
|
|
from datasets import load_dataset |
|
|
|
|
|
def main(): |
|
|
print("Loading dataset...") |
|
|
dataset = load_dataset("allenai/Dolci-Instruct-SFT-Tool-Use", split="train") |
|
|
|
|
|
print(f"Total samples: {len(dataset)}") |
|
|
print(f"\nDataset features: {dataset.features}") |
|
|
print(f"\nColumn names: {dataset.column_names}") |
|
|
|
|
|
|
|
|
print("\n=== First sample ===") |
|
|
sample = dataset[0] |
|
|
print(f"Keys: {sample.keys()}") |
|
|
for key in sample.keys(): |
|
|
value = sample[key] |
|
|
if isinstance(value, str): |
|
|
print(f"\n{key}: {value[:200] if len(value) > 200 else value}") |
|
|
elif isinstance(value, list): |
|
|
print(f"\n{key} (list with {len(value)} items):") |
|
|
if value: |
|
|
print(f" First item: {value[0]}") |
|
|
else: |
|
|
print(f"\n{key}: {value}") |
|
|
|
|
|
|
|
|
print("\n\n=== Sample 100 ===") |
|
|
sample = dataset[100] |
|
|
for key in sample.keys(): |
|
|
value = sample[key] |
|
|
if isinstance(value, str): |
|
|
print(f"\n{key}: {value[:200] if len(value) > 200 else value}") |
|
|
elif isinstance(value, list): |
|
|
print(f"\n{key} (list with {len(value)} items):") |
|
|
if value: |
|
|
print(f" First item: {value[0]}") |
|
|
else: |
|
|
print(f"\n{key}: {value}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|