Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| # Create realistic sample autism screening dataset | |
| np.random.seed(42) | |
| n_samples = 704 | |
| # Features based on typical autism screening questionnaires | |
| data = { | |
| 'A1_prefer_detail_not_big_picture': np.random.randint(0, 2, n_samples), | |
| 'A2_must_have_sameness': np.random.randint(0, 2, n_samples), | |
| 'A3_prefer_reading_systematically': np.random.randint(0, 2, n_samples), | |
| 'A4_feel_anxious_in_social': np.random.randint(0, 2, n_samples), | |
| 'A5_prefer_talking_one_to_one': np.random.randint(0, 2, n_samples), | |
| 'A6_notice_small_changes': np.random.randint(0, 2, n_samples), | |
| 'A7_trouble_focus_on_changing': np.random.randint(0, 2, n_samples), | |
| 'A8_often_daydream': np.random.randint(0, 2, n_samples), | |
| 'A9_focused_on_one_topic': np.random.randint(0, 2, n_samples), | |
| 'A10_difficult_small_talk': np.random.randint(0, 2, n_samples), | |
| 'age': np.random.randint(18, 80, n_samples), | |
| 'gender': np.random.choice(['M', 'F'], n_samples), | |
| 'ethnicity': np.random.choice(['White', 'Asian', 'Black', 'Others'], n_samples), | |
| 'jundice': np.random.choice(['yes', 'no'], n_samples), | |
| 'autism_family_member': np.random.choice(['yes', 'no'], n_samples), | |
| 'country': np.random.choice(['USA', 'UK', 'Canada', 'India'], n_samples), | |
| 'used_app_before': np.random.choice(['yes', 'no'], n_samples), | |
| 'screening_type': np.random.choice(['Questionnaire', 'Interview'], n_samples), | |
| } | |
| autism_score = (data['A1_prefer_detail_not_big_picture'] + | |
| data['A2_must_have_sameness'] + | |
| data['A4_feel_anxious_in_social'] + | |
| data['A9_focused_on_one_topic'] + | |
| data['A10_difficult_small_talk']) | |
| class_binary = (autism_score >= 3).astype(int) | |
| data['Class'] = ['YES' if x == 1 else 'NO' for x in class_binary] | |
| df = pd.DataFrame(data) | |
| df.to_csv('data/autism_screening.csv', index=False) | |
| print(f'✅ Sample dataset created!') | |
| print(f' Records: {len(df)}') | |
| print(f' Features: {len(df.columns)}') | |
| print(f' Saved to: data/autism_screening.csv') | |
| print(f'\nClass Distribution:') | |
| print(df['Class'].value_counts()) | |