hello-world / dataset_integration_test.py
Chiedo John
Add dataset integration to Hello World model
d0c3c53
"""
Simple test to verify dataset integration setup.
This test doesn't require external libraries to be installed.
"""
import json
import os
def test_dataset_files():
"""Test that dataset files exist and are properly formatted."""
dataset_path = os.path.expanduser("~/huggingface.co/datasets/chiedo/hello-world")
print("Testing Dataset Integration Setup")
print("=" * 50)
# Check dataset files exist
required_files = ["train.jsonl", "validation.jsonl", "test.jsonl", "README.md", "hello_world.py"]
print("\n1. Checking dataset files:")
for file in required_files:
file_path = os.path.join(dataset_path, file)
if os.path.exists(file_path):
print(f" βœ“ {file} exists")
else:
print(f" βœ— {file} missing")
# Load and validate dataset content
print("\n2. Validating dataset content:")
splits = ["train", "validation", "test"]
for split in splits:
file_path = os.path.join(dataset_path, f"{split}.jsonl")
try:
with open(file_path, 'r') as f:
lines = f.readlines()
print(f"\n {split} split:")
print(f" - Examples: {len(lines)}")
# Parse first example
first_example = json.loads(lines[0])
print(f" - First example: {first_example}")
# Validate structure
if "text" in first_example and "label" in first_example:
print(f" - Structure: βœ“ Valid")
else:
print(f" - Structure: βœ— Invalid")
except Exception as e:
print(f" Error reading {split}: {e}")
# Check model integration code
print("\n3. Checking model integration:")
model_file = "model.py"
if os.path.exists(model_file):
with open(model_file, 'r') as f:
content = f.read()
# Check for dataset integration methods
if "load_dataset" in content:
print(" βœ“ load_dataset method found in model.py")
else:
print(" βœ— load_dataset method not found")
if "prepare_dataset_batch" in content:
print(" βœ“ prepare_dataset_batch method found in model.py")
else:
print(" βœ— prepare_dataset_batch method not found")
if "from datasets import load_dataset" in content:
print(" βœ“ datasets import found in model.py")
else:
print(" βœ— datasets import not found")
print("\n4. Dataset URLs:")
print(f" Model: https://huggingface.co/chiedo/hello-world")
print(f" Dataset: https://huggingface.co/datasets/chiedo/hello-world")
print("\n" + "=" * 50)
print("Dataset integration setup complete!")
print("\nTo use the dataset with the model, install dependencies:")
print(" pip install torch transformers datasets")
print("\nThen run:")
print(" python example_with_dataset.py")
if __name__ == "__main__":
test_dataset_files()