autism-screening / create_sample_data.py
harshith1411's picture
Upload 10 files
90bbde0 verified
import pandas as pd
import numpy as np
# Create realistic sample autism screening dataset
np.random.seed(42)
n_samples = 704
# Features based on typical autism screening questionnaires
data = {
'A1_prefer_detail_not_big_picture': np.random.randint(0, 2, n_samples),
'A2_must_have_sameness': np.random.randint(0, 2, n_samples),
'A3_prefer_reading_systematically': np.random.randint(0, 2, n_samples),
'A4_feel_anxious_in_social': np.random.randint(0, 2, n_samples),
'A5_prefer_talking_one_to_one': np.random.randint(0, 2, n_samples),
'A6_notice_small_changes': np.random.randint(0, 2, n_samples),
'A7_trouble_focus_on_changing': np.random.randint(0, 2, n_samples),
'A8_often_daydream': np.random.randint(0, 2, n_samples),
'A9_focused_on_one_topic': np.random.randint(0, 2, n_samples),
'A10_difficult_small_talk': np.random.randint(0, 2, n_samples),
'age': np.random.randint(18, 80, n_samples),
'gender': np.random.choice(['M', 'F'], n_samples),
'ethnicity': np.random.choice(['White', 'Asian', 'Black', 'Others'], n_samples),
'jundice': np.random.choice(['yes', 'no'], n_samples),
'autism_family_member': np.random.choice(['yes', 'no'], n_samples),
'country': np.random.choice(['USA', 'UK', 'Canada', 'India'], n_samples),
'used_app_before': np.random.choice(['yes', 'no'], n_samples),
'screening_type': np.random.choice(['Questionnaire', 'Interview'], n_samples),
}
autism_score = (data['A1_prefer_detail_not_big_picture'] +
data['A2_must_have_sameness'] +
data['A4_feel_anxious_in_social'] +
data['A9_focused_on_one_topic'] +
data['A10_difficult_small_talk'])
class_binary = (autism_score >= 3).astype(int)
data['Class'] = ['YES' if x == 1 else 'NO' for x in class_binary]
df = pd.DataFrame(data)
df.to_csv('data/autism_screening.csv', index=False)
print(f'✅ Sample dataset created!')
print(f' Records: {len(df)}')
print(f' Features: {len(df.columns)}')
print(f' Saved to: data/autism_screening.csv')
print(f'\nClass Distribution:')
print(df['Class'].value_counts())