File size: 3,144 Bytes
d0c3c53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
"""
Simple test to verify dataset integration setup.
This test doesn't require external libraries to be installed.
"""
import json
import os
def test_dataset_files():
"""Test that dataset files exist and are properly formatted."""
dataset_path = os.path.expanduser("~/huggingface.co/datasets/chiedo/hello-world")
print("Testing Dataset Integration Setup")
print("=" * 50)
# Check dataset files exist
required_files = ["train.jsonl", "validation.jsonl", "test.jsonl", "README.md", "hello_world.py"]
print("\n1. Checking dataset files:")
for file in required_files:
file_path = os.path.join(dataset_path, file)
if os.path.exists(file_path):
print(f" β {file} exists")
else:
print(f" β {file} missing")
# Load and validate dataset content
print("\n2. Validating dataset content:")
splits = ["train", "validation", "test"]
for split in splits:
file_path = os.path.join(dataset_path, f"{split}.jsonl")
try:
with open(file_path, 'r') as f:
lines = f.readlines()
print(f"\n {split} split:")
print(f" - Examples: {len(lines)}")
# Parse first example
first_example = json.loads(lines[0])
print(f" - First example: {first_example}")
# Validate structure
if "text" in first_example and "label" in first_example:
print(f" - Structure: β Valid")
else:
print(f" - Structure: β Invalid")
except Exception as e:
print(f" Error reading {split}: {e}")
# Check model integration code
print("\n3. Checking model integration:")
model_file = "model.py"
if os.path.exists(model_file):
with open(model_file, 'r') as f:
content = f.read()
# Check for dataset integration methods
if "load_dataset" in content:
print(" β load_dataset method found in model.py")
else:
print(" β load_dataset method not found")
if "prepare_dataset_batch" in content:
print(" β prepare_dataset_batch method found in model.py")
else:
print(" β prepare_dataset_batch method not found")
if "from datasets import load_dataset" in content:
print(" β datasets import found in model.py")
else:
print(" β datasets import not found")
print("\n4. Dataset URLs:")
print(f" Model: https://huggingface.co/chiedo/hello-world")
print(f" Dataset: https://huggingface.co/datasets/chiedo/hello-world")
print("\n" + "=" * 50)
print("Dataset integration setup complete!")
print("\nTo use the dataset with the model, install dependencies:")
print(" pip install torch transformers datasets")
print("\nThen run:")
print(" python example_with_dataset.py")
if __name__ == "__main__":
test_dataset_files() |