File size: 8,428 Bytes
52b110b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age                                                                                                                                           0\n",
      "Gender                                                                                                                                        0\n",
      "How many meals do you have a day? (number of regular occasions in a day when a significant and reasonably filling amount of food is eaten)    0\n",
      "What would best describe your diet:                                                                                                           0\n",
      "Choose all that apply: [I skip meals]                                                                                                         0\n",
      "Choose all that apply: [I experience feelings of hunger during the day]                                                                       0\n",
      "Choose all that apply: [I consult a nutritionist/dietician]                                                                                   0\n",
      "Choose all that apply: [I cook my own meals]                                                                                                  0\n",
      "What would you consider to be the main meal of YOUR day?                                                                                      0\n",
      "What does your diet mostly consist of and how is it prepared?                                                                                 0\n",
      "How many times a week do you order-in or go out to eat?                                                                                       0\n",
      "Are you allergic to any of the following? (Tick all that apply)                                                                               0\n",
      "What is your weekly food intake frequency of the following food categories: [Sweet foods]                                                     0\n",
      "What is your weekly food intake frequency of the following food categories: [Salty foods]                                                     0\n",
      "What is your weekly food intake frequency of the following food categories: [Fresh fruit]                                                     0\n",
      "What is your weekly food intake frequency of the following food categories: [Fresh vegetables]                                                0\n",
      "What is your weekly food intake frequency of the following food categories: [Oily, fried foods]                                               0\n",
      "What is your weekly food intake frequency of the following food categories: [Meat]                                                            0\n",
      "What is your weekly food intake frequency of the following food categories: [Seafood ]                                                        0\n",
      "How frequently do you consume these beverages [Tea]                                                                                           0\n",
      "How frequently do you consume these beverages [Coffee]                                                                                        0\n",
      "How frequently do you consume these beverages [Aerated (Soft) Drinks]                                                                         0\n",
      "How frequently do you consume these beverages [Fruit Juices (Fresh/Packaged)]                                                                 0\n",
      "How frequently do you consume these beverages [Dairy Beverages (Milk, Milkshakes, Smoothies, Buttermilk, etc)]                                0\n",
      "How frequently do you consume these beverages [Alcoholic Beverages]                                                                           0\n",
      "What is your water consumption like (in a day, 1 cup=250ml approx)                                                                            0\n",
      "Disease Risk                                                                                                                                  0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
    "\n",
    "# Load the data\n",
    "data = pd.read_csv('data.csv')\n",
    "\n",
    "# Check for missing values\n",
    "print(data.isnull().sum())\n",
    "\n",
    "# Encode categorical variables\n",
    "label_encoders = {}\n",
    "for column in data.select_dtypes(include=['object']).columns:\n",
    "    le = LabelEncoder()\n",
    "    data[column] = le.fit_transform(data[column])\n",
    "    label_encoders[column] = le\n",
    "\n",
    "# Split the data into features and target\n",
    "X = data.drop(columns=['Disease Risk'])\n",
    "y = data['Disease Risk']\n",
    "\n",
    "# Standardize the features\n",
    "scaler = StandardScaler()\n",
    "X_scaled = scaler.fit_transform(X)\n",
    "\n",
    "# Split the data into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.3333333333333333\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.00      0.00      0.00         2\n",
      "           1       0.00      0.00      0.00         2\n",
      "           2       0.33      1.00      0.50         2\n",
      "\n",
      "    accuracy                           0.33         6\n",
      "   macro avg       0.11      0.33      0.17         6\n",
      "weighted avg       0.11      0.33      0.17         6\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\USER\\AppData\\Roaming\\Python\\Python312\\site-packages\\sklearn\\metrics\\_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
      "C:\\Users\\USER\\AppData\\Roaming\\Python\\Python312\\site-packages\\sklearn\\metrics\\_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
      "C:\\Users\\USER\\AppData\\Roaming\\Python\\Python312\\site-packages\\sklearn\\metrics\\_classification.py:1509: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
      "  _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "\n",
    "# Initialize the model\n",
    "model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
    "\n",
    "# Train the model\n",
    "model.fit(X_train, y_train)\n",
    "\n",
    "# Make predictions\n",
    "y_pred = model.predict(X_test)\n",
    "\n",
    "# Evaluate the model\n",
    "accuracy = accuracy_score(y_test, y_pred)\n",
    "print(f'Accuracy: {accuracy}')\n",
    "print(classification_report(y_test, y_pred))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}