Dama12 commited on
Commit
e9899bd
·
1 Parent(s): a844eb4

Ajouter des dépendances

Browse files
Files changed (6) hide show
  1. allinone.py +0 -882
  2. app.py +882 -162
  3. requirements.txt +1 -4
  4. shared.py +0 -6
  5. styles.css +0 -12
  6. tips.csv +0 -245
allinone.py CHANGED
@@ -1,882 +0,0 @@
1
- <<<<<<< HEAD
2
- import pandas as pd
3
- import numpy as np
4
- from xgboost import XGBClassifier
5
- from lightgbm import LGBMClassifier
6
- from sklearn.ensemble import RandomForestClassifier
7
- from sklearn.linear_model import LogisticRegression
8
- from sklearn.svm import SVC
9
- from sklearn.preprocessing import StandardScaler, LabelEncoder
10
- from sklearn.model_selection import StratifiedKFold
11
- from sklearn.metrics import classification_report, recall_score, f1_score
12
- from sklearn.impute import SimpleImputer
13
- from imblearn.over_sampling import SMOTE
14
- from imblearn.under_sampling import RandomUnderSampler
15
- from imblearn.pipeline import Pipeline
16
- import joblib
17
- from flask import Flask, request, jsonify
18
- from flask_cors import CORS
19
- import os
20
- import warnings
21
- import time
22
- from tqdm import tqdm
23
- import threading
24
- import logging
25
- from tenacity import retry, wait_fixed, stop_after_attempt
26
-
27
- warnings.filterwarnings('ignore', category=UserWarning)
28
- os.environ["LOKY_MAX_CPU_COUNT"] = "1"
29
-
30
- logging.basicConfig(level=logging.INFO)
31
- logger = logging.getLogger(__name__)
32
-
33
- app = Flask(__name__)
34
- CORS(app)
35
-
36
- NEW_DATA_FILE = 'new_data.csv'
37
- DATASET_PATH = "my_datasheet_80000.csv"
38
- MIN_NEW_SAMPLES_FOR_RETRAIN = 100
39
-
40
- # Feature sets for each task
41
- PRIORITY_FEATURES = [
42
- 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
43
- 'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
44
- 'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
45
- ]
46
-
47
- SERVICE_FEATURES = [
48
- 'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
49
- 'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
50
- ]
51
-
52
- priority_model = None
53
- service_model = None
54
- priority_scaler = None
55
- service_scaler = None
56
- priority_imputer = None
57
- service_imputer = None
58
- label_encoder_service = LabelEncoder()
59
-
60
- model_lock = threading.Lock()
61
-
62
- def enhanced_features(df):
63
- df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
64
- (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
65
- (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
66
- df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
67
- (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
68
- (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
69
- df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
70
- (row['Age'] < 12 and row['Pouls'] > 120) or
71
- (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
72
- df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
73
- (row['Age'] < 12 and row['Pouls'] < 70) or
74
- (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
75
- df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
76
- df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
77
- df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
78
- df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
79
- df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
80
- df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
81
- df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
82
- df['SpO2_Log'] = np.log1p(df['SpO2'])
83
- df['Temp_Squared'] = df['Temperature'] ** 2
84
- df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
85
- df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
86
- df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
87
- df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
88
- df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
89
- df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
90
- df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
91
- df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
92
- (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
93
- return df
94
-
95
- def compute_service_and_priority(row):
96
- age = row['Age']
97
- spO2 = row['SpO2']
98
- frq_resp = row['Frquce_Rprtr(rpm)']
99
- pouls = row['Pouls']
100
- ecg = row['ECG']
101
- pa = row['PA']
102
- temp = row['Temperature']
103
- enceinte = row['Enceinte']
104
- imc = row['IMC']
105
-
106
- if age <= 18:
107
- service = 'Pédiatriques'
108
- elif enceinte:
109
- service = 'Gynécologie/Obstétrique'
110
- elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
111
- service = 'Neurologie'
112
- elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
113
- service = 'Cardiorespiratoire'
114
- elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
115
- (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
116
- service = 'Médecine générale'
117
- elif temp > 40:
118
- service = 'Radiothérapie'
119
- else:
120
- service = 'Chirurgie'
121
-
122
- if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
123
- priorite = 1
124
- elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
125
- priorite = 2
126
- elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
127
- priorite = 3
128
- elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
129
- priorite = 4
130
- else:
131
- priorite = 5
132
-
133
- return service, priorite
134
-
135
- def get_smote_strategy(y, max_samples=1000):
136
- class_counts = pd.Series(y).value_counts()
137
- strategy = {}
138
- for cls, count in class_counts.items():
139
- target = min(max_samples, max(count * 2, 100)) # Ensure reasonable class sizes
140
- return strategy
141
-
142
- def train_priority_model():
143
- global priority_model, priority_scaler, priority_imputer
144
- try:
145
- data = pd.read_csv(DATASET_PATH)
146
- data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
147
- data['Enceinte'] = data['Enceinte'].astype(int)
148
- data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
149
- data = enhanced_features(data)
150
- data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
151
- data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
152
-
153
- X = data[PRIORITY_FEATURES]
154
- y = data['Priorite'].values - 1 # Shift to 0-based indexing
155
-
156
- priority_imputer = SimpleImputer(strategy='median')
157
- X_imputed = priority_imputer.fit_transform(X)
158
- priority_scaler = StandardScaler()
159
- X_scaled = priority_scaler.fit_transform(X_imputed)
160
-
161
- models = {
162
- 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
163
- 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
164
- reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
165
- 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
166
- 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
167
- 'SVM': SVC(probability=True, random_state=42)
168
- }
169
-
170
- skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
171
- results = {}
172
-
173
- for name, model in models.items():
174
- logger.info(f"\nEvaluating {name} for Priority...")
175
- scores = {'f1': [], 'recall_p1': [], 'time': []}
176
- for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
177
- X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
178
- y_train, y_test = y[train_idx], y[test_idx]
179
-
180
- min_class_size = pd.Series(y_train).value_counts().min()
181
- k_neighbors = min(5, max(1, min_class_size - 1))
182
- pipeline = Pipeline([
183
- ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
184
- ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
185
- ])
186
- X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
187
- class_sizes = pd.Series(y_train_res).value_counts().to_dict()
188
- logger.info(f"{name} - Resampled class sizes: {class_sizes}")
189
-
190
- start_time = time.time()
191
- model.fit(X_train_res, y_train_res)
192
- train_time = time.time() - start_time
193
-
194
- y_pred = model.predict(X_test)
195
- scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
196
- scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
197
- scores['time'].append(train_time)
198
- logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
199
-
200
- results[name] = {
201
- 'f1': np.mean(scores['f1']),
202
- 'recall_p1': np.mean(scores['recall_p1']),
203
- 'time': np.mean(scores['time'])
204
- }
205
- if name == 'LightGBM':
206
- feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
207
- logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
208
-
209
- logger.info("\nPriority Model Comparison:")
210
- for name, res in results.items():
211
- logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
212
-
213
- best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
214
- logger.info(f"Best Priority Model: {best_model}")
215
-
216
- with model_lock:
217
- priority_model = models[best_model]
218
- priority_model.fit(X_scaled, y)
219
-
220
- timestamp = int(time.time())
221
- joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
222
- joblib.dump(priority_scaler, 'priority_scaler.pkl')
223
- joblib.dump(priority_imputer, 'priority_imputer.pkl')
224
- logger.info("Priority model saved.")
225
- except Exception as e:
226
- logger.error(f"Error in priority training: {e}")
227
- raise
228
-
229
- def train_service_model():
230
- global service_model, service_scaler, service_imputer, label_encoder_service
231
- try:
232
- data = pd.read_csv(DATASET_PATH)
233
- data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
234
- data['Enceinte'] = data['Enceinte'].astype(int)
235
- data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
236
- data = enhanced_features(data)
237
- data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
238
-
239
- X = data[SERVICE_FEATURES]
240
- y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
241
-
242
- service_imputer = SimpleImputer(strategy='median')
243
- X_imputed = service_imputer.fit_transform(X)
244
- service_scaler = StandardScaler()
245
- X_scaled = service_scaler.fit_transform(X_imputed)
246
-
247
- models = {
248
- 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
249
- 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
250
- reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
251
- 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
252
- 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
253
- 'SVM': SVC(probability=True, random_state=42)
254
- }
255
-
256
- skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
257
- results = {}
258
-
259
- for name, model in models.items():
260
- logger.info(f"\nEvaluating {name} for Service...")
261
- scores = {'f1': [], 'time': []}
262
- for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
263
- X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
264
- y_train, y_test = y[train_idx], y[test_idx]
265
-
266
- min_class_size = pd.Series(y_train).value_counts().min()
267
- k_neighbors = min(5, max(1, min_class_size - 1))
268
- pipeline = Pipeline([
269
- ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
270
- ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
271
- ])
272
- X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
273
- class_sizes = pd.Series(y_train_res).value_counts().to_dict()
274
- logger.info(f"{name} - Resampled class sizes: {class_sizes}")
275
-
276
- start_time = time.time()
277
- model.fit(X_train_res, y_train_res)
278
- train_time = time.time() - start_time
279
-
280
- y_pred = model.predict(X_test)
281
- scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
282
- scores['time'].append(train_time)
283
-
284
- results[name] = {
285
- 'f1': np.mean(scores['f1']),
286
- 'time': np.mean(scores['time'])
287
- }
288
- if name == 'LightGBM':
289
- feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
290
- logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
291
-
292
- logger.info("\nService Model Comparison:")
293
- for name, res in results.items():
294
- logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
295
-
296
- best_model = max(results, key=lambda k: results[k]['f1'])
297
- logger.info(f"Best Service Model: {best_model}")
298
-
299
- with model_lock:
300
- service_model = models[best_model]
301
- service_model.fit(X_scaled, y)
302
-
303
- timestamp = int(time.time())
304
- joblib.dump(service_model, f'service_model_{timestamp}.pkl')
305
- joblib.dump(service_scaler, 'service_scaler.pkl')
306
- joblib.dump(service_imputer, 'service_imputer.pkl')
307
- joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
308
- logger.info("Service model saved.")
309
- except Exception as e:
310
- logger.error(f"Error in service training: {e}")
311
- raise
312
-
313
- @retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
314
- def retrain_models():
315
- global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
316
- while True:
317
- time.sleep(3600)
318
- if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
319
- try:
320
- new_data = pd.read_csv(NEW_DATA_FILE)
321
- if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
322
- orig_data = pd.read_csv(DATASET_PATH)
323
- orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
324
- orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
325
- orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
326
- new_data = enhanced_features(new_data)
327
- combined_data = pd.concat([orig_data, new_data], ignore_index=True)
328
-
329
- # Priority retraining
330
- X_priority = combined_data[PRIORITY_FEATURES]
331
- y_priority = combined_data['Priorite'].values - 1
332
- X_priority_imputed = priority_imputer.transform(X_priority)
333
- X_priority_scaled = priority_scaler.transform(X_priority_imputed)
334
- with model_lock:
335
- priority_model.fit(X_priority_scaled, y_priority)
336
-
337
- # Service retraining
338
- X_service = combined_data[SERVICE_FEATURES]
339
- y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
340
- X_service_imputed = service_imputer.transform(X_service)
341
- X_service_scaled = service_scaler.transform(X_service_imputed)
342
- with model_lock:
343
- service_model.fit(X_service_scaled, y_service)
344
-
345
- timestamp = int(time.time())
346
- joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
347
- joblib.dump(service_model, f'service_model_{timestamp}.pkl')
348
- new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
349
- open(NEW_DATA_FILE, 'w').close()
350
- logger.info("Models retrained and saved.")
351
- except Exception as e:
352
- logger.error(f"Error in retrain: {e}")
353
-
354
- @app.route('/predict', methods=['POST'])
355
- def predict():
356
- global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
357
- try:
358
- data = request.get_json()
359
- required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
360
- missing_fields = [field for field in required_fields if field not in data]
361
- if missing_fields:
362
- return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
363
-
364
- input_data = {
365
- 'Age': float(data['age']),
366
- 'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
367
- 'Enceinte': 1 if bool(data['enceinte']) else 0,
368
- 'SpO2': float(data['spo2']),
369
- 'Frquce_Rprtr(rpm)': float(data['freq_resp']),
370
- 'Pouls': float(data['pouls']),
371
- 'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
372
- 'PA': float(data['pa']),
373
- 'Temperature': float(data['temperature']),
374
- 'IMC': float(data['imc']),
375
- }
376
-
377
- input_df = pd.DataFrame([input_data])
378
- input_df = enhanced_features(input_df)
379
- suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
380
- input_df['Suggested_Priority'] = suggested_priority
381
-
382
- with model_lock:
383
- # Priority prediction
384
- priority_input = input_df[PRIORITY_FEATURES]
385
- priority_imputed = priority_imputer.transform(priority_input)
386
- priority_scaled = priority_scaler.transform(priority_imputed)
387
- priority_probs = priority_model.predict_proba(priority_scaled)[0]
388
- priority_pred = np.argmax(priority_probs) + 1
389
- priority_conf = float(max(priority_probs))
390
-
391
- # Service prediction
392
- service_input = input_df[SERVICE_FEATURES]
393
- service_imputed = service_imputer.transform(service_input)
394
- service_scaled = service_scaler.transform(service_imputed)
395
- service_probs = service_model.predict_proba(service_scaled)[0]
396
- service_pred_idx = np.argmax(service_probs)
397
- service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
398
- service_conf = float(max(service_probs))
399
-
400
- # Fallback to rule-based logic if confidence is low or critical conditions apply
401
- if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
402
- priority_pred = suggested_priority
403
- if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
404
- service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
405
-
406
- input_df['Priorite'] = priority_pred
407
- input_df['Service_Suivant'] = service_pred
408
- if not os.path.exists(NEW_DATA_FILE):
409
- input_df.to_csv(NEW_DATA_FILE, index=False)
410
- else:
411
- input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
412
-
413
- logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
414
- return jsonify({
415
- 'priority': int(priority_pred),
416
- 'service_suivant': service_pred,
417
- 'priority_confidence': priority_conf,
418
- 'service_confidence': service_conf
419
- })
420
- except Exception as e:
421
- logger.error(f"Prediction error: {str(e)}")
422
- return jsonify({'error': str(e)}), 500
423
-
424
- if __name__ == '__main__':
425
- FORCE_RETRAIN = True
426
- if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
427
- train_priority_model()
428
- train_service_model()
429
- else:
430
- with model_lock:
431
- priority_model = joblib.load('priority_model.pkl')
432
- service_model = joblib.load('service_model.pkl')
433
- priority_scaler = joblib.load('priority_scaler.pkl')
434
- service_scaler = joblib.load('service_scaler.pkl')
435
- priority_imputer = joblib.load('priority_imputer.pkl')
436
- service_imputer = joblib.load('service_imputer.pkl')
437
- label_encoder_service = joblib.load('label_encoder_service.pkl')
438
-
439
- retrain_thread = threading.Thread(target=retrain_models, daemon=True)
440
- retrain_thread.start()
441
- =======
442
- import pandas as pd
443
- import numpy as np
444
- from xgboost import XGBClassifier
445
- from lightgbm import LGBMClassifier
446
- from sklearn.ensemble import RandomForestClassifier
447
- from sklearn.linear_model import LogisticRegression
448
- from sklearn.svm import SVC
449
- from sklearn.preprocessing import StandardScaler, LabelEncoder
450
- from sklearn.model_selection import StratifiedKFold
451
- from sklearn.metrics import classification_report, recall_score, f1_score
452
- from sklearn.impute import SimpleImputer
453
- from imblearn.over_sampling import SMOTE
454
- from imblearn.under_sampling import RandomUnderSampler
455
- from imblearn.pipeline import Pipeline
456
- import joblib
457
- from flask import Flask, request, jsonify
458
- from flask_cors import CORS
459
- import os
460
- import warnings
461
- import time
462
- from tqdm import tqdm
463
- import threading
464
- import logging
465
- from tenacity import retry, wait_fixed, stop_after_attempt
466
-
467
- warnings.filterwarnings('ignore', category=UserWarning)
468
- os.environ["LOKY_MAX_CPU_COUNT"] = "1"
469
-
470
- logging.basicConfig(level=logging.INFO)
471
- logger = logging.getLogger(__name__)
472
-
473
- app = Flask(__name__)
474
- CORS(app)
475
-
476
- NEW_DATA_FILE = 'new_data.csv'
477
- DATASET_PATH = "my_datasheet_80000.csv"
478
- MIN_NEW_SAMPLES_FOR_RETRAIN = 100
479
-
480
- # Feature sets for each task
481
- PRIORITY_FEATURES = [
482
- 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
483
- 'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
484
- 'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
485
- ]
486
-
487
- SERVICE_FEATURES = [
488
- 'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
489
- 'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
490
- ]
491
-
492
- priority_model = None
493
- service_model = None
494
- priority_scaler = None
495
- service_scaler = None
496
- priority_imputer = None
497
- service_imputer = None
498
- label_encoder_service = LabelEncoder()
499
-
500
- model_lock = threading.Lock()
501
-
502
- def enhanced_features(df):
503
- df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
504
- (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
505
- (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
506
- df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
507
- (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
508
- (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
509
- df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
510
- (row['Age'] < 12 and row['Pouls'] > 120) or
511
- (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
512
- df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
513
- (row['Age'] < 12 and row['Pouls'] < 70) or
514
- (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
515
- df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
516
- df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
517
- df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
518
- df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
519
- df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
520
- df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
521
- df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
522
- df['SpO2_Log'] = np.log1p(df['SpO2'])
523
- df['Temp_Squared'] = df['Temperature'] ** 2
524
- df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
525
- df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
526
- df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
527
- df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
528
- df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
529
- df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
530
- df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
531
- df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
532
- (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
533
- return df
534
-
535
- def compute_service_and_priority(row):
536
- age = row['Age']
537
- spO2 = row['SpO2']
538
- frq_resp = row['Frquce_Rprtr(rpm)']
539
- pouls = row['Pouls']
540
- ecg = row['ECG']
541
- pa = row['PA']
542
- temp = row['Temperature']
543
- enceinte = row['Enceinte']
544
- imc = row['IMC']
545
-
546
- if age <= 18:
547
- service = 'Pédiatriques'
548
- elif enceinte:
549
- service = 'Gynécologie/Obstétrique'
550
- elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
551
- service = 'Neurologie'
552
- elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
553
- service = 'Cardiorespiratoire'
554
- elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
555
- (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
556
- service = 'Médecine générale'
557
- elif temp > 40:
558
- service = 'Radiothérapie'
559
- else:
560
- service = 'Chirurgie'
561
-
562
- if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
563
- priorite = 1
564
- elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
565
- priorite = 2
566
- elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
567
- priorite = 3
568
- elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
569
- priorite = 4
570
- else:
571
- priorite = 5
572
-
573
- return service, priorite
574
-
575
- def get_smote_strategy(y, max_samples=1000):
576
- class_counts = pd.Series(y).value_counts()
577
- strategy = {}
578
- for cls, count in class_counts.items():
579
- target = min(max_samples, max(count * 2, 100)) # Ensure reasonable class sizes
580
- return strategy
581
-
582
- def train_priority_model():
583
- global priority_model, priority_scaler, priority_imputer
584
- try:
585
- data = pd.read_csv(DATASET_PATH)
586
- data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
587
- data['Enceinte'] = data['Enceinte'].astype(int)
588
- data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
589
- data = enhanced_features(data)
590
- data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
591
- data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
592
-
593
- X = data[PRIORITY_FEATURES]
594
- y = data['Priorite'].values - 1 # Shift to 0-based indexing
595
-
596
- priority_imputer = SimpleImputer(strategy='median')
597
- X_imputed = priority_imputer.fit_transform(X)
598
- priority_scaler = StandardScaler()
599
- X_scaled = priority_scaler.fit_transform(X_imputed)
600
-
601
- models = {
602
- 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
603
- 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
604
- reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
605
- 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
606
- 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
607
- 'SVM': SVC(probability=True, random_state=42)
608
- }
609
-
610
- skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
611
- results = {}
612
-
613
- for name, model in models.items():
614
- logger.info(f"\nEvaluating {name} for Priority...")
615
- scores = {'f1': [], 'recall_p1': [], 'time': []}
616
- for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
617
- X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
618
- y_train, y_test = y[train_idx], y[test_idx]
619
-
620
- min_class_size = pd.Series(y_train).value_counts().min()
621
- k_neighbors = min(5, max(1, min_class_size - 1))
622
- pipeline = Pipeline([
623
- ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
624
- ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
625
- ])
626
- X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
627
- class_sizes = pd.Series(y_train_res).value_counts().to_dict()
628
- logger.info(f"{name} - Resampled class sizes: {class_sizes}")
629
-
630
- start_time = time.time()
631
- model.fit(X_train_res, y_train_res)
632
- train_time = time.time() - start_time
633
-
634
- y_pred = model.predict(X_test)
635
- scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
636
- scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
637
- scores['time'].append(train_time)
638
- logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
639
-
640
- results[name] = {
641
- 'f1': np.mean(scores['f1']),
642
- 'recall_p1': np.mean(scores['recall_p1']),
643
- 'time': np.mean(scores['time'])
644
- }
645
- if name == 'LightGBM':
646
- feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
647
- logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
648
-
649
- logger.info("\nPriority Model Comparison:")
650
- for name, res in results.items():
651
- logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
652
-
653
- best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
654
- logger.info(f"Best Priority Model: {best_model}")
655
-
656
- with model_lock:
657
- priority_model = models[best_model]
658
- priority_model.fit(X_scaled, y)
659
-
660
- timestamp = int(time.time())
661
- joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
662
- joblib.dump(priority_scaler, 'priority_scaler.pkl')
663
- joblib.dump(priority_imputer, 'priority_imputer.pkl')
664
- logger.info("Priority model saved.")
665
- except Exception as e:
666
- logger.error(f"Error in priority training: {e}")
667
- raise
668
-
669
- def train_service_model():
670
- global service_model, service_scaler, service_imputer, label_encoder_service
671
- try:
672
- data = pd.read_csv(DATASET_PATH)
673
- data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
674
- data['Enceinte'] = data['Enceinte'].astype(int)
675
- data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
676
- data = enhanced_features(data)
677
- data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
678
-
679
- X = data[SERVICE_FEATURES]
680
- y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
681
-
682
- service_imputer = SimpleImputer(strategy='median')
683
- X_imputed = service_imputer.fit_transform(X)
684
- service_scaler = StandardScaler()
685
- X_scaled = service_scaler.fit_transform(X_imputed)
686
-
687
- models = {
688
- 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
689
- 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
690
- reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
691
- 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
692
- 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
693
- 'SVM': SVC(probability=True, random_state=42)
694
- }
695
-
696
- skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
697
- results = {}
698
-
699
- for name, model in models.items():
700
- logger.info(f"\nEvaluating {name} for Service...")
701
- scores = {'f1': [], 'time': []}
702
- for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
703
- X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
704
- y_train, y_test = y[train_idx], y[test_idx]
705
-
706
- min_class_size = pd.Series(y_train).value_counts().min()
707
- k_neighbors = min(5, max(1, min_class_size - 1))
708
- pipeline = Pipeline([
709
- ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
710
- ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
711
- ])
712
- X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
713
- class_sizes = pd.Series(y_train_res).value_counts().to_dict()
714
- logger.info(f"{name} - Resampled class sizes: {class_sizes}")
715
-
716
- start_time = time.time()
717
- model.fit(X_train_res, y_train_res)
718
- train_time = time.time() - start_time
719
-
720
- y_pred = model.predict(X_test)
721
- scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
722
- scores['time'].append(train_time)
723
-
724
- results[name] = {
725
- 'f1': np.mean(scores['f1']),
726
- 'time': np.mean(scores['time'])
727
- }
728
- if name == 'LightGBM':
729
- feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
730
- logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
731
-
732
- logger.info("\nService Model Comparison:")
733
- for name, res in results.items():
734
- logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
735
-
736
- best_model = max(results, key=lambda k: results[k]['f1'])
737
- logger.info(f"Best Service Model: {best_model}")
738
-
739
- with model_lock:
740
- service_model = models[best_model]
741
- service_model.fit(X_scaled, y)
742
-
743
- timestamp = int(time.time())
744
- joblib.dump(service_model, f'service_model_{timestamp}.pkl')
745
- joblib.dump(service_scaler, 'service_scaler.pkl')
746
- joblib.dump(service_imputer, 'service_imputer.pkl')
747
- joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
748
- logger.info("Service model saved.")
749
- except Exception as e:
750
- logger.error(f"Error in service training: {e}")
751
- raise
752
-
753
- @retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
754
- def retrain_models():
755
- global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
756
- while True:
757
- time.sleep(3600)
758
- if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
759
- try:
760
- new_data = pd.read_csv(NEW_DATA_FILE)
761
- if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
762
- orig_data = pd.read_csv(DATASET_PATH)
763
- orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
764
- orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
765
- orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
766
- new_data = enhanced_features(new_data)
767
- combined_data = pd.concat([orig_data, new_data], ignore_index=True)
768
-
769
- # Priority retraining
770
- X_priority = combined_data[PRIORITY_FEATURES]
771
- y_priority = combined_data['Priorite'].values - 1
772
- X_priority_imputed = priority_imputer.transform(X_priority)
773
- X_priority_scaled = priority_scaler.transform(X_priority_imputed)
774
- with model_lock:
775
- priority_model.fit(X_priority_scaled, y_priority)
776
-
777
- # Service retraining
778
- X_service = combined_data[SERVICE_FEATURES]
779
- y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
780
- X_service_imputed = service_imputer.transform(X_service)
781
- X_service_scaled = service_scaler.transform(X_service_imputed)
782
- with model_lock:
783
- service_model.fit(X_service_scaled, y_service)
784
-
785
- timestamp = int(time.time())
786
- joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
787
- joblib.dump(service_model, f'service_model_{timestamp}.pkl')
788
- new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
789
- open(NEW_DATA_FILE, 'w').close()
790
- logger.info("Models retrained and saved.")
791
- except Exception as e:
792
- logger.error(f"Error in retrain: {e}")
793
-
794
- @app.route('/predict', methods=['POST'])
795
- def predict():
796
- global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
797
- try:
798
- data = request.get_json()
799
- required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
800
- missing_fields = [field for field in required_fields if field not in data]
801
- if missing_fields:
802
- return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
803
-
804
- input_data = {
805
- 'Age': float(data['age']),
806
- 'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
807
- 'Enceinte': 1 if bool(data['enceinte']) else 0,
808
- 'SpO2': float(data['spo2']),
809
- 'Frquce_Rprtr(rpm)': float(data['freq_resp']),
810
- 'Pouls': float(data['pouls']),
811
- 'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
812
- 'PA': float(data['pa']),
813
- 'Temperature': float(data['temperature']),
814
- 'IMC': float(data['imc']),
815
- }
816
-
817
- input_df = pd.DataFrame([input_data])
818
- input_df = enhanced_features(input_df)
819
- suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
820
- input_df['Suggested_Priority'] = suggested_priority
821
-
822
- with model_lock:
823
- # Priority prediction
824
- priority_input = input_df[PRIORITY_FEATURES]
825
- priority_imputed = priority_imputer.transform(priority_input)
826
- priority_scaled = priority_scaler.transform(priority_imputed)
827
- priority_probs = priority_model.predict_proba(priority_scaled)[0]
828
- priority_pred = np.argmax(priority_probs) + 1
829
- priority_conf = float(max(priority_probs))
830
-
831
- # Service prediction
832
- service_input = input_df[SERVICE_FEATURES]
833
- service_imputed = service_imputer.transform(service_input)
834
- service_scaled = service_scaler.transform(service_imputed)
835
- service_probs = service_model.predict_proba(service_scaled)[0]
836
- service_pred_idx = np.argmax(service_probs)
837
- service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
838
- service_conf = float(max(service_probs))
839
-
840
- # Fallback to rule-based logic if confidence is low or critical conditions apply
841
- if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
842
- priority_pred = suggested_priority
843
- if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
844
- service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
845
-
846
- input_df['Priorite'] = priority_pred
847
- input_df['Service_Suivant'] = service_pred
848
- if not os.path.exists(NEW_DATA_FILE):
849
- input_df.to_csv(NEW_DATA_FILE, index=False)
850
- else:
851
- input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
852
-
853
- logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
854
- return jsonify({
855
- 'priority': int(priority_pred),
856
- 'service_suivant': service_pred,
857
- 'priority_confidence': priority_conf,
858
- 'service_confidence': service_conf
859
- })
860
- except Exception as e:
861
- logger.error(f"Prediction error: {str(e)}")
862
- return jsonify({'error': str(e)}), 500
863
-
864
- if __name__ == '__main__':
865
- FORCE_RETRAIN = True
866
- if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
867
- train_priority_model()
868
- train_service_model()
869
- else:
870
- with model_lock:
871
- priority_model = joblib.load('priority_model.pkl')
872
- service_model = joblib.load('service_model.pkl')
873
- priority_scaler = joblib.load('priority_scaler.pkl')
874
- service_scaler = joblib.load('service_scaler.pkl')
875
- priority_imputer = joblib.load('priority_imputer.pkl')
876
- service_imputer = joblib.load('service_imputer.pkl')
877
- label_encoder_service = joblib.load('label_encoder_service.pkl')
878
-
879
- retrain_thread = threading.Thread(target=retrain_models, daemon=True)
880
- retrain_thread.start()
881
- >>>>>>> 12fbcdcf1e034f735bed38d79600e83ccc29f849
882
- app.run(debug=False, host='0.0.0.0', port=5000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,162 +1,882 @@
1
- import faicons as fa
2
- import plotly.express as px
3
-
4
- # Load data and compute static values
5
- from shared import app_dir, tips
6
- from shinywidgets import render_plotly
7
-
8
- from shiny import reactive, render
9
- from shiny.express import input, ui
10
-
11
- bill_rng = (min(tips.total_bill), max(tips.total_bill))
12
-
13
- # Add page title and sidebar
14
- ui.page_opts(title="Restaurant tipping", fillable=True)
15
-
16
- with ui.sidebar(open="desktop"):
17
- ui.input_slider(
18
- "total_bill",
19
- "Bill amount",
20
- min=bill_rng[0],
21
- max=bill_rng[1],
22
- value=bill_rng,
23
- pre="$",
24
- )
25
- ui.input_checkbox_group(
26
- "time",
27
- "Food service",
28
- ["Lunch", "Dinner"],
29
- selected=["Lunch", "Dinner"],
30
- inline=True,
31
- )
32
- ui.input_action_button("reset", "Reset filter")
33
-
34
- # Add main content
35
- ICONS = {
36
- "user": fa.icon_svg("user", "regular"),
37
- "wallet": fa.icon_svg("wallet"),
38
- "currency-dollar": fa.icon_svg("dollar-sign"),
39
- "ellipsis": fa.icon_svg("ellipsis"),
40
- }
41
-
42
- with ui.layout_columns(fill=False):
43
- with ui.value_box(showcase=ICONS["user"]):
44
- "Total tippers"
45
-
46
- @render.express
47
- def total_tippers():
48
- tips_data().shape[0]
49
-
50
- with ui.value_box(showcase=ICONS["wallet"]):
51
- "Average tip"
52
-
53
- @render.express
54
- def average_tip():
55
- d = tips_data()
56
- if d.shape[0] > 0:
57
- perc = d.tip / d.total_bill
58
- f"{perc.mean():.1%}"
59
-
60
- with ui.value_box(showcase=ICONS["currency-dollar"]):
61
- "Average bill"
62
-
63
- @render.express
64
- def average_bill():
65
- d = tips_data()
66
- if d.shape[0] > 0:
67
- bill = d.total_bill.mean()
68
- f"${bill:.2f}"
69
-
70
-
71
- with ui.layout_columns(col_widths=[6, 6, 12]):
72
- with ui.card(full_screen=True):
73
- ui.card_header("Tips data")
74
-
75
- @render.data_frame
76
- def table():
77
- return render.DataGrid(tips_data())
78
-
79
- with ui.card(full_screen=True):
80
- with ui.card_header(class_="d-flex justify-content-between align-items-center"):
81
- "Total bill vs tip"
82
- with ui.popover(title="Add a color variable", placement="top"):
83
- ICONS["ellipsis"]
84
- ui.input_radio_buttons(
85
- "scatter_color",
86
- None,
87
- ["none", "sex", "smoker", "day", "time"],
88
- inline=True,
89
- )
90
-
91
- @render_plotly
92
- def scatterplot():
93
- color = input.scatter_color()
94
- return px.scatter(
95
- tips_data(),
96
- x="total_bill",
97
- y="tip",
98
- color=None if color == "none" else color,
99
- trendline="lowess",
100
- )
101
-
102
- with ui.card(full_screen=True):
103
- with ui.card_header(class_="d-flex justify-content-between align-items-center"):
104
- "Tip percentages"
105
- with ui.popover(title="Add a color variable"):
106
- ICONS["ellipsis"]
107
- ui.input_radio_buttons(
108
- "tip_perc_y",
109
- "Split by:",
110
- ["sex", "smoker", "day", "time"],
111
- selected="day",
112
- inline=True,
113
- )
114
-
115
- @render_plotly
116
- def tip_perc():
117
- from ridgeplot import ridgeplot
118
-
119
- dat = tips_data()
120
- dat["percent"] = dat.tip / dat.total_bill
121
- yvar = input.tip_perc_y()
122
- uvals = dat[yvar].unique()
123
-
124
- samples = [[dat.percent[dat[yvar] == val]] for val in uvals]
125
-
126
- plt = ridgeplot(
127
- samples=samples,
128
- labels=uvals,
129
- bandwidth=0.01,
130
- colorscale="viridis",
131
- colormode="row-index",
132
- )
133
-
134
- plt.update_layout(
135
- legend=dict(
136
- orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5
137
- )
138
- )
139
-
140
- return plt
141
-
142
-
143
- ui.include_css(app_dir / "styles.css")
144
-
145
- # --------------------------------------------------------
146
- # Reactive calculations and effects
147
- # --------------------------------------------------------
148
-
149
-
150
- @reactive.calc
151
- def tips_data():
152
- bill = input.total_bill()
153
- idx1 = tips.total_bill.between(bill[0], bill[1])
154
- idx2 = tips.time.isin(input.time())
155
- return tips[idx1 & idx2]
156
-
157
-
158
- @reactive.effect
159
- @reactive.event(input.reset)
160
- def _():
161
- ui.update_slider("total_bill", value=bill_rng)
162
- ui.update_checkbox_group("time", selected=["Lunch", "Dinner"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
+ import pandas as pd
3
+ import numpy as np
4
+ from xgboost import XGBClassifier
5
+ from lightgbm import LGBMClassifier
6
+ from sklearn.ensemble import RandomForestClassifier
7
+ from sklearn.linear_model import LogisticRegression
8
+ from sklearn.svm import SVC
9
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
10
+ from sklearn.model_selection import StratifiedKFold
11
+ from sklearn.metrics import classification_report, recall_score, f1_score
12
+ from sklearn.impute import SimpleImputer
13
+ from imblearn.over_sampling import SMOTE
14
+ from imblearn.under_sampling import RandomUnderSampler
15
+ from imblearn.pipeline import Pipeline
16
+ import joblib
17
+ from flask import Flask, request, jsonify
18
+ from flask_cors import CORS
19
+ import os
20
+ import warnings
21
+ import time
22
+ from tqdm import tqdm
23
+ import threading
24
+ import logging
25
+ from tenacity import retry, wait_fixed, stop_after_attempt
26
+
27
+ warnings.filterwarnings('ignore', category=UserWarning)
28
+ os.environ["LOKY_MAX_CPU_COUNT"] = "1"
29
+
30
+ logging.basicConfig(level=logging.INFO)
31
+ logger = logging.getLogger(__name__)
32
+
33
+ app = Flask(__name__)
34
+ CORS(app)
35
+
36
+ NEW_DATA_FILE = 'new_data.csv'
37
+ DATASET_PATH = "my_datasheet_80000.csv"
38
+ MIN_NEW_SAMPLES_FOR_RETRAIN = 100
39
+
40
+ # Feature sets for each task
41
+ PRIORITY_FEATURES = [
42
+ 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
43
+ 'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
44
+ 'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
45
+ ]
46
+
47
+ SERVICE_FEATURES = [
48
+ 'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
49
+ 'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
50
+ ]
51
+
52
+ priority_model = None
53
+ service_model = None
54
+ priority_scaler = None
55
+ service_scaler = None
56
+ priority_imputer = None
57
+ service_imputer = None
58
+ label_encoder_service = LabelEncoder()
59
+
60
+ model_lock = threading.Lock()
61
+
62
+ def enhanced_features(df):
63
+ df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
64
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
65
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
66
+ df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
67
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
68
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
69
+ df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
70
+ (row['Age'] < 12 and row['Pouls'] > 120) or
71
+ (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
72
+ df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
73
+ (row['Age'] < 12 and row['Pouls'] < 70) or
74
+ (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
75
+ df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
76
+ df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
77
+ df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
78
+ df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
79
+ df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
80
+ df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
81
+ df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
82
+ df['SpO2_Log'] = np.log1p(df['SpO2'])
83
+ df['Temp_Squared'] = df['Temperature'] ** 2
84
+ df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
85
+ df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
86
+ df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
87
+ df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
88
+ df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
89
+ df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
90
+ df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
91
+ df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
92
+ (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
93
+ return df
94
+
95
+ def compute_service_and_priority(row):
96
+ age = row['Age']
97
+ spO2 = row['SpO2']
98
+ frq_resp = row['Frquce_Rprtr(rpm)']
99
+ pouls = row['Pouls']
100
+ ecg = row['ECG']
101
+ pa = row['PA']
102
+ temp = row['Temperature']
103
+ enceinte = row['Enceinte']
104
+ imc = row['IMC']
105
+
106
+ if age <= 18:
107
+ service = 'Pédiatriques'
108
+ elif enceinte:
109
+ service = 'Gynécologie/Obstétrique'
110
+ elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
111
+ service = 'Neurologie'
112
+ elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
113
+ service = 'Cardiorespiratoire'
114
+ elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
115
+ (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
116
+ service = 'Médecine générale'
117
+ elif temp > 40:
118
+ service = 'Radiothérapie'
119
+ else:
120
+ service = 'Chirurgie'
121
+
122
+ if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
123
+ priorite = 1
124
+ elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
125
+ priorite = 2
126
+ elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
127
+ priorite = 3
128
+ elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
129
+ priorite = 4
130
+ else:
131
+ priorite = 5
132
+
133
+ return service, priorite
134
+
135
+ def get_smote_strategy(y, max_samples=1000):
136
+ class_counts = pd.Series(y).value_counts()
137
+ strategy = {}
138
+ for cls, count in class_counts.items():
139
+ target = min(max_samples, max(count * 2, 100)) # Ensure reasonable class sizes
140
+ return strategy
141
+
142
+ def train_priority_model():
143
+ global priority_model, priority_scaler, priority_imputer
144
+ try:
145
+ data = pd.read_csv(DATASET_PATH)
146
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
147
+ data['Enceinte'] = data['Enceinte'].astype(int)
148
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
149
+ data = enhanced_features(data)
150
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
151
+ data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
152
+
153
+ X = data[PRIORITY_FEATURES]
154
+ y = data['Priorite'].values - 1 # Shift to 0-based indexing
155
+
156
+ priority_imputer = SimpleImputer(strategy='median')
157
+ X_imputed = priority_imputer.fit_transform(X)
158
+ priority_scaler = StandardScaler()
159
+ X_scaled = priority_scaler.fit_transform(X_imputed)
160
+
161
+ models = {
162
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
163
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
164
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
165
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
166
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
167
+ 'SVM': SVC(probability=True, random_state=42)
168
+ }
169
+
170
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
171
+ results = {}
172
+
173
+ for name, model in models.items():
174
+ logger.info(f"\nEvaluating {name} for Priority...")
175
+ scores = {'f1': [], 'recall_p1': [], 'time': []}
176
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
177
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
178
+ y_train, y_test = y[train_idx], y[test_idx]
179
+
180
+ min_class_size = pd.Series(y_train).value_counts().min()
181
+ k_neighbors = min(5, max(1, min_class_size - 1))
182
+ pipeline = Pipeline([
183
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
184
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
185
+ ])
186
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
187
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
188
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
189
+
190
+ start_time = time.time()
191
+ model.fit(X_train_res, y_train_res)
192
+ train_time = time.time() - start_time
193
+
194
+ y_pred = model.predict(X_test)
195
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
196
+ scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
197
+ scores['time'].append(train_time)
198
+ logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
199
+
200
+ results[name] = {
201
+ 'f1': np.mean(scores['f1']),
202
+ 'recall_p1': np.mean(scores['recall_p1']),
203
+ 'time': np.mean(scores['time'])
204
+ }
205
+ if name == 'LightGBM':
206
+ feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
207
+ logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
208
+
209
+ logger.info("\nPriority Model Comparison:")
210
+ for name, res in results.items():
211
+ logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
212
+
213
+ best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
214
+ logger.info(f"Best Priority Model: {best_model}")
215
+
216
+ with model_lock:
217
+ priority_model = models[best_model]
218
+ priority_model.fit(X_scaled, y)
219
+
220
+ timestamp = int(time.time())
221
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
222
+ joblib.dump(priority_scaler, 'priority_scaler.pkl')
223
+ joblib.dump(priority_imputer, 'priority_imputer.pkl')
224
+ logger.info("Priority model saved.")
225
+ except Exception as e:
226
+ logger.error(f"Error in priority training: {e}")
227
+ raise
228
+
229
+ def train_service_model():
230
+ global service_model, service_scaler, service_imputer, label_encoder_service
231
+ try:
232
+ data = pd.read_csv(DATASET_PATH)
233
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
234
+ data['Enceinte'] = data['Enceinte'].astype(int)
235
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
236
+ data = enhanced_features(data)
237
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
238
+
239
+ X = data[SERVICE_FEATURES]
240
+ y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
241
+
242
+ service_imputer = SimpleImputer(strategy='median')
243
+ X_imputed = service_imputer.fit_transform(X)
244
+ service_scaler = StandardScaler()
245
+ X_scaled = service_scaler.fit_transform(X_imputed)
246
+
247
+ models = {
248
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
249
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
250
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
251
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
252
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
253
+ 'SVM': SVC(probability=True, random_state=42)
254
+ }
255
+
256
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
257
+ results = {}
258
+
259
+ for name, model in models.items():
260
+ logger.info(f"\nEvaluating {name} for Service...")
261
+ scores = {'f1': [], 'time': []}
262
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
263
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
264
+ y_train, y_test = y[train_idx], y[test_idx]
265
+
266
+ min_class_size = pd.Series(y_train).value_counts().min()
267
+ k_neighbors = min(5, max(1, min_class_size - 1))
268
+ pipeline = Pipeline([
269
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
270
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
271
+ ])
272
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
273
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
274
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
275
+
276
+ start_time = time.time()
277
+ model.fit(X_train_res, y_train_res)
278
+ train_time = time.time() - start_time
279
+
280
+ y_pred = model.predict(X_test)
281
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
282
+ scores['time'].append(train_time)
283
+
284
+ results[name] = {
285
+ 'f1': np.mean(scores['f1']),
286
+ 'time': np.mean(scores['time'])
287
+ }
288
+ if name == 'LightGBM':
289
+ feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
290
+ logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
291
+
292
+ logger.info("\nService Model Comparison:")
293
+ for name, res in results.items():
294
+ logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
295
+
296
+ best_model = max(results, key=lambda k: results[k]['f1'])
297
+ logger.info(f"Best Service Model: {best_model}")
298
+
299
+ with model_lock:
300
+ service_model = models[best_model]
301
+ service_model.fit(X_scaled, y)
302
+
303
+ timestamp = int(time.time())
304
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
305
+ joblib.dump(service_scaler, 'service_scaler.pkl')
306
+ joblib.dump(service_imputer, 'service_imputer.pkl')
307
+ joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
308
+ logger.info("Service model saved.")
309
+ except Exception as e:
310
+ logger.error(f"Error in service training: {e}")
311
+ raise
312
+
313
+ @retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
314
+ def retrain_models():
315
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
316
+ while True:
317
+ time.sleep(3600)
318
+ if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
319
+ try:
320
+ new_data = pd.read_csv(NEW_DATA_FILE)
321
+ if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
322
+ orig_data = pd.read_csv(DATASET_PATH)
323
+ orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
324
+ orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
325
+ orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
326
+ new_data = enhanced_features(new_data)
327
+ combined_data = pd.concat([orig_data, new_data], ignore_index=True)
328
+
329
+ # Priority retraining
330
+ X_priority = combined_data[PRIORITY_FEATURES]
331
+ y_priority = combined_data['Priorite'].values - 1
332
+ X_priority_imputed = priority_imputer.transform(X_priority)
333
+ X_priority_scaled = priority_scaler.transform(X_priority_imputed)
334
+ with model_lock:
335
+ priority_model.fit(X_priority_scaled, y_priority)
336
+
337
+ # Service retraining
338
+ X_service = combined_data[SERVICE_FEATURES]
339
+ y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
340
+ X_service_imputed = service_imputer.transform(X_service)
341
+ X_service_scaled = service_scaler.transform(X_service_imputed)
342
+ with model_lock:
343
+ service_model.fit(X_service_scaled, y_service)
344
+
345
+ timestamp = int(time.time())
346
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
347
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
348
+ new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
349
+ open(NEW_DATA_FILE, 'w').close()
350
+ logger.info("Models retrained and saved.")
351
+ except Exception as e:
352
+ logger.error(f"Error in retrain: {e}")
353
+
354
+ @app.route('/predict', methods=['POST'])
355
+ def predict():
356
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
357
+ try:
358
+ data = request.get_json()
359
+ required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
360
+ missing_fields = [field for field in required_fields if field not in data]
361
+ if missing_fields:
362
+ return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
363
+
364
+ input_data = {
365
+ 'Age': float(data['age']),
366
+ 'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
367
+ 'Enceinte': 1 if bool(data['enceinte']) else 0,
368
+ 'SpO2': float(data['spo2']),
369
+ 'Frquce_Rprtr(rpm)': float(data['freq_resp']),
370
+ 'Pouls': float(data['pouls']),
371
+ 'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
372
+ 'PA': float(data['pa']),
373
+ 'Temperature': float(data['temperature']),
374
+ 'IMC': float(data['imc']),
375
+ }
376
+
377
+ input_df = pd.DataFrame([input_data])
378
+ input_df = enhanced_features(input_df)
379
+ suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
380
+ input_df['Suggested_Priority'] = suggested_priority
381
+
382
+ with model_lock:
383
+ # Priority prediction
384
+ priority_input = input_df[PRIORITY_FEATURES]
385
+ priority_imputed = priority_imputer.transform(priority_input)
386
+ priority_scaled = priority_scaler.transform(priority_imputed)
387
+ priority_probs = priority_model.predict_proba(priority_scaled)[0]
388
+ priority_pred = np.argmax(priority_probs) + 1
389
+ priority_conf = float(max(priority_probs))
390
+
391
+ # Service prediction
392
+ service_input = input_df[SERVICE_FEATURES]
393
+ service_imputed = service_imputer.transform(service_input)
394
+ service_scaled = service_scaler.transform(service_imputed)
395
+ service_probs = service_model.predict_proba(service_scaled)[0]
396
+ service_pred_idx = np.argmax(service_probs)
397
+ service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
398
+ service_conf = float(max(service_probs))
399
+
400
+ # Fallback to rule-based logic if confidence is low or critical conditions apply
401
+ if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
402
+ priority_pred = suggested_priority
403
+ if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
404
+ service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
405
+
406
+ input_df['Priorite'] = priority_pred
407
+ input_df['Service_Suivant'] = service_pred
408
+ if not os.path.exists(NEW_DATA_FILE):
409
+ input_df.to_csv(NEW_DATA_FILE, index=False)
410
+ else:
411
+ input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
412
+
413
+ logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
414
+ return jsonify({
415
+ 'priority': int(priority_pred),
416
+ 'service_suivant': service_pred,
417
+ 'priority_confidence': priority_conf,
418
+ 'service_confidence': service_conf
419
+ })
420
+ except Exception as e:
421
+ logger.error(f"Prediction error: {str(e)}")
422
+ return jsonify({'error': str(e)}), 500
423
+
424
+ if __name__ == '__main__':
425
+ FORCE_RETRAIN = True
426
+ if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
427
+ train_priority_model()
428
+ train_service_model()
429
+ else:
430
+ with model_lock:
431
+ priority_model = joblib.load('priority_model.pkl')
432
+ service_model = joblib.load('service_model.pkl')
433
+ priority_scaler = joblib.load('priority_scaler.pkl')
434
+ service_scaler = joblib.load('service_scaler.pkl')
435
+ priority_imputer = joblib.load('priority_imputer.pkl')
436
+ service_imputer = joblib.load('service_imputer.pkl')
437
+ label_encoder_service = joblib.load('label_encoder_service.pkl')
438
+
439
+ retrain_thread = threading.Thread(target=retrain_models, daemon=True)
440
+ retrain_thread.start()
441
+ =======
442
+ import pandas as pd
443
+ import numpy as np
444
+ from xgboost import XGBClassifier
445
+ from lightgbm import LGBMClassifier
446
+ from sklearn.ensemble import RandomForestClassifier
447
+ from sklearn.linear_model import LogisticRegression
448
+ from sklearn.svm import SVC
449
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
450
+ from sklearn.model_selection import StratifiedKFold
451
+ from sklearn.metrics import classification_report, recall_score, f1_score
452
+ from sklearn.impute import SimpleImputer
453
+ from imblearn.over_sampling import SMOTE
454
+ from imblearn.under_sampling import RandomUnderSampler
455
+ from imblearn.pipeline import Pipeline
456
+ import joblib
457
+ from flask import Flask, request, jsonify
458
+ from flask_cors import CORS
459
+ import os
460
+ import warnings
461
+ import time
462
+ from tqdm import tqdm
463
+ import threading
464
+ import logging
465
+ from tenacity import retry, wait_fixed, stop_after_attempt
466
+
467
+ warnings.filterwarnings('ignore', category=UserWarning)
468
+ os.environ["LOKY_MAX_CPU_COUNT"] = "1"
469
+
470
+ logging.basicConfig(level=logging.INFO)
471
+ logger = logging.getLogger(__name__)
472
+
473
+ app = Flask(__name__)
474
+ CORS(app)
475
+
476
+ NEW_DATA_FILE = 'new_data.csv'
477
+ DATASET_PATH = "my_datasheet_80000.csv"
478
+ MIN_NEW_SAMPLES_FOR_RETRAIN = 100
479
+
480
+ # Feature sets for each task
481
+ PRIORITY_FEATURES = [
482
+ 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
483
+ 'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
484
+ 'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
485
+ ]
486
+
487
+ SERVICE_FEATURES = [
488
+ 'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
489
+ 'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
490
+ ]
491
+
492
+ priority_model = None
493
+ service_model = None
494
+ priority_scaler = None
495
+ service_scaler = None
496
+ priority_imputer = None
497
+ service_imputer = None
498
+ label_encoder_service = LabelEncoder()
499
+
500
+ model_lock = threading.Lock()
501
+
502
+ def enhanced_features(df):
503
+ df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
504
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
505
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
506
+ df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
507
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
508
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
509
+ df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
510
+ (row['Age'] < 12 and row['Pouls'] > 120) or
511
+ (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
512
+ df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
513
+ (row['Age'] < 12 and row['Pouls'] < 70) or
514
+ (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
515
+ df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
516
+ df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
517
+ df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
518
+ df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
519
+ df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
520
+ df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
521
+ df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
522
+ df['SpO2_Log'] = np.log1p(df['SpO2'])
523
+ df['Temp_Squared'] = df['Temperature'] ** 2
524
+ df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
525
+ df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
526
+ df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
527
+ df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
528
+ df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
529
+ df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
530
+ df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
531
+ df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
532
+ (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
533
+ return df
534
+
535
+ def compute_service_and_priority(row):
536
+ age = row['Age']
537
+ spO2 = row['SpO2']
538
+ frq_resp = row['Frquce_Rprtr(rpm)']
539
+ pouls = row['Pouls']
540
+ ecg = row['ECG']
541
+ pa = row['PA']
542
+ temp = row['Temperature']
543
+ enceinte = row['Enceinte']
544
+ imc = row['IMC']
545
+
546
+ if age <= 18:
547
+ service = 'Pédiatriques'
548
+ elif enceinte:
549
+ service = 'Gynécologie/Obstétrique'
550
+ elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
551
+ service = 'Neurologie'
552
+ elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
553
+ service = 'Cardiorespiratoire'
554
+ elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
555
+ (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
556
+ service = 'Médecine générale'
557
+ elif temp > 40:
558
+ service = 'Radiothérapie'
559
+ else:
560
+ service = 'Chirurgie'
561
+
562
+ if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
563
+ priorite = 1
564
+ elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
565
+ priorite = 2
566
+ elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
567
+ priorite = 3
568
+ elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
569
+ priorite = 4
570
+ else:
571
+ priorite = 5
572
+
573
+ return service, priorite
574
+
575
+ def get_smote_strategy(y, max_samples=1000):
576
+ class_counts = pd.Series(y).value_counts()
577
+ strategy = {}
578
+ for cls, count in class_counts.items():
579
+ target = min(max_samples, max(count * 2, 100)) # Ensure reasonable class sizes
580
+ return strategy
581
+
582
+ def train_priority_model():
583
+ global priority_model, priority_scaler, priority_imputer
584
+ try:
585
+ data = pd.read_csv(DATASET_PATH)
586
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
587
+ data['Enceinte'] = data['Enceinte'].astype(int)
588
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
589
+ data = enhanced_features(data)
590
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
591
+ data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
592
+
593
+ X = data[PRIORITY_FEATURES]
594
+ y = data['Priorite'].values - 1 # Shift to 0-based indexing
595
+
596
+ priority_imputer = SimpleImputer(strategy='median')
597
+ X_imputed = priority_imputer.fit_transform(X)
598
+ priority_scaler = StandardScaler()
599
+ X_scaled = priority_scaler.fit_transform(X_imputed)
600
+
601
+ models = {
602
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
603
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
604
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
605
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
606
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
607
+ 'SVM': SVC(probability=True, random_state=42)
608
+ }
609
+
610
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
611
+ results = {}
612
+
613
+ for name, model in models.items():
614
+ logger.info(f"\nEvaluating {name} for Priority...")
615
+ scores = {'f1': [], 'recall_p1': [], 'time': []}
616
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
617
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
618
+ y_train, y_test = y[train_idx], y[test_idx]
619
+
620
+ min_class_size = pd.Series(y_train).value_counts().min()
621
+ k_neighbors = min(5, max(1, min_class_size - 1))
622
+ pipeline = Pipeline([
623
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
624
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
625
+ ])
626
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
627
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
628
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
629
+
630
+ start_time = time.time()
631
+ model.fit(X_train_res, y_train_res)
632
+ train_time = time.time() - start_time
633
+
634
+ y_pred = model.predict(X_test)
635
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
636
+ scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
637
+ scores['time'].append(train_time)
638
+ logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
639
+
640
+ results[name] = {
641
+ 'f1': np.mean(scores['f1']),
642
+ 'recall_p1': np.mean(scores['recall_p1']),
643
+ 'time': np.mean(scores['time'])
644
+ }
645
+ if name == 'LightGBM':
646
+ feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
647
+ logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
648
+
649
+ logger.info("\nPriority Model Comparison:")
650
+ for name, res in results.items():
651
+ logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
652
+
653
+ best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
654
+ logger.info(f"Best Priority Model: {best_model}")
655
+
656
+ with model_lock:
657
+ priority_model = models[best_model]
658
+ priority_model.fit(X_scaled, y)
659
+
660
+ timestamp = int(time.time())
661
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
662
+ joblib.dump(priority_scaler, 'priority_scaler.pkl')
663
+ joblib.dump(priority_imputer, 'priority_imputer.pkl')
664
+ logger.info("Priority model saved.")
665
+ except Exception as e:
666
+ logger.error(f"Error in priority training: {e}")
667
+ raise
668
+
669
+ def train_service_model():
670
+ global service_model, service_scaler, service_imputer, label_encoder_service
671
+ try:
672
+ data = pd.read_csv(DATASET_PATH)
673
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
674
+ data['Enceinte'] = data['Enceinte'].astype(int)
675
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
676
+ data = enhanced_features(data)
677
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
678
+
679
+ X = data[SERVICE_FEATURES]
680
+ y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
681
+
682
+ service_imputer = SimpleImputer(strategy='median')
683
+ X_imputed = service_imputer.fit_transform(X)
684
+ service_scaler = StandardScaler()
685
+ X_scaled = service_scaler.fit_transform(X_imputed)
686
+
687
+ models = {
688
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
689
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
690
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
691
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
692
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
693
+ 'SVM': SVC(probability=True, random_state=42)
694
+ }
695
+
696
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
697
+ results = {}
698
+
699
+ for name, model in models.items():
700
+ logger.info(f"\nEvaluating {name} for Service...")
701
+ scores = {'f1': [], 'time': []}
702
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
703
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
704
+ y_train, y_test = y[train_idx], y[test_idx]
705
+
706
+ min_class_size = pd.Series(y_train).value_counts().min()
707
+ k_neighbors = min(5, max(1, min_class_size - 1))
708
+ pipeline = Pipeline([
709
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
710
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
711
+ ])
712
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
713
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
714
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
715
+
716
+ start_time = time.time()
717
+ model.fit(X_train_res, y_train_res)
718
+ train_time = time.time() - start_time
719
+
720
+ y_pred = model.predict(X_test)
721
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
722
+ scores['time'].append(train_time)
723
+
724
+ results[name] = {
725
+ 'f1': np.mean(scores['f1']),
726
+ 'time': np.mean(scores['time'])
727
+ }
728
+ if name == 'LightGBM':
729
+ feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
730
+ logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
731
+
732
+ logger.info("\nService Model Comparison:")
733
+ for name, res in results.items():
734
+ logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
735
+
736
+ best_model = max(results, key=lambda k: results[k]['f1'])
737
+ logger.info(f"Best Service Model: {best_model}")
738
+
739
+ with model_lock:
740
+ service_model = models[best_model]
741
+ service_model.fit(X_scaled, y)
742
+
743
+ timestamp = int(time.time())
744
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
745
+ joblib.dump(service_scaler, 'service_scaler.pkl')
746
+ joblib.dump(service_imputer, 'service_imputer.pkl')
747
+ joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
748
+ logger.info("Service model saved.")
749
+ except Exception as e:
750
+ logger.error(f"Error in service training: {e}")
751
+ raise
752
+
753
+ @retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
754
+ def retrain_models():
755
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
756
+ while True:
757
+ time.sleep(3600)
758
+ if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
759
+ try:
760
+ new_data = pd.read_csv(NEW_DATA_FILE)
761
+ if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
762
+ orig_data = pd.read_csv(DATASET_PATH)
763
+ orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
764
+ orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
765
+ orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
766
+ new_data = enhanced_features(new_data)
767
+ combined_data = pd.concat([orig_data, new_data], ignore_index=True)
768
+
769
+ # Priority retraining
770
+ X_priority = combined_data[PRIORITY_FEATURES]
771
+ y_priority = combined_data['Priorite'].values - 1
772
+ X_priority_imputed = priority_imputer.transform(X_priority)
773
+ X_priority_scaled = priority_scaler.transform(X_priority_imputed)
774
+ with model_lock:
775
+ priority_model.fit(X_priority_scaled, y_priority)
776
+
777
+ # Service retraining
778
+ X_service = combined_data[SERVICE_FEATURES]
779
+ y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
780
+ X_service_imputed = service_imputer.transform(X_service)
781
+ X_service_scaled = service_scaler.transform(X_service_imputed)
782
+ with model_lock:
783
+ service_model.fit(X_service_scaled, y_service)
784
+
785
+ timestamp = int(time.time())
786
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
787
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
788
+ new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
789
+ open(NEW_DATA_FILE, 'w').close()
790
+ logger.info("Models retrained and saved.")
791
+ except Exception as e:
792
+ logger.error(f"Error in retrain: {e}")
793
+
794
+ @app.route('/predict', methods=['POST'])
795
+ def predict():
796
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
797
+ try:
798
+ data = request.get_json()
799
+ required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
800
+ missing_fields = [field for field in required_fields if field not in data]
801
+ if missing_fields:
802
+ return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
803
+
804
+ input_data = {
805
+ 'Age': float(data['age']),
806
+ 'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
807
+ 'Enceinte': 1 if bool(data['enceinte']) else 0,
808
+ 'SpO2': float(data['spo2']),
809
+ 'Frquce_Rprtr(rpm)': float(data['freq_resp']),
810
+ 'Pouls': float(data['pouls']),
811
+ 'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
812
+ 'PA': float(data['pa']),
813
+ 'Temperature': float(data['temperature']),
814
+ 'IMC': float(data['imc']),
815
+ }
816
+
817
+ input_df = pd.DataFrame([input_data])
818
+ input_df = enhanced_features(input_df)
819
+ suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
820
+ input_df['Suggested_Priority'] = suggested_priority
821
+
822
+ with model_lock:
823
+ # Priority prediction
824
+ priority_input = input_df[PRIORITY_FEATURES]
825
+ priority_imputed = priority_imputer.transform(priority_input)
826
+ priority_scaled = priority_scaler.transform(priority_imputed)
827
+ priority_probs = priority_model.predict_proba(priority_scaled)[0]
828
+ priority_pred = np.argmax(priority_probs) + 1
829
+ priority_conf = float(max(priority_probs))
830
+
831
+ # Service prediction
832
+ service_input = input_df[SERVICE_FEATURES]
833
+ service_imputed = service_imputer.transform(service_input)
834
+ service_scaled = service_scaler.transform(service_imputed)
835
+ service_probs = service_model.predict_proba(service_scaled)[0]
836
+ service_pred_idx = np.argmax(service_probs)
837
+ service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
838
+ service_conf = float(max(service_probs))
839
+
840
+ # Fallback to rule-based logic if confidence is low or critical conditions apply
841
+ if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
842
+ priority_pred = suggested_priority
843
+ if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
844
+ service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
845
+
846
+ input_df['Priorite'] = priority_pred
847
+ input_df['Service_Suivant'] = service_pred
848
+ if not os.path.exists(NEW_DATA_FILE):
849
+ input_df.to_csv(NEW_DATA_FILE, index=False)
850
+ else:
851
+ input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
852
+
853
+ logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
854
+ return jsonify({
855
+ 'priority': int(priority_pred),
856
+ 'service_suivant': service_pred,
857
+ 'priority_confidence': priority_conf,
858
+ 'service_confidence': service_conf
859
+ })
860
+ except Exception as e:
861
+ logger.error(f"Prediction error: {str(e)}")
862
+ return jsonify({'error': str(e)}), 500
863
+
864
+ if __name__ == '__main__':
865
+ FORCE_RETRAIN = True
866
+ if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
867
+ train_priority_model()
868
+ train_service_model()
869
+ else:
870
+ with model_lock:
871
+ priority_model = joblib.load('priority_model.pkl')
872
+ service_model = joblib.load('service_model.pkl')
873
+ priority_scaler = joblib.load('priority_scaler.pkl')
874
+ service_scaler = joblib.load('service_scaler.pkl')
875
+ priority_imputer = joblib.load('priority_imputer.pkl')
876
+ service_imputer = joblib.load('service_imputer.pkl')
877
+ label_encoder_service = joblib.load('label_encoder_service.pkl')
878
+
879
+ retrain_thread = threading.Thread(target=retrain_models, daemon=True)
880
+ retrain_thread.start()
881
+ >>>>>>> 12fbcdcf1e034f735bed38d79600e83ccc29f849
882
+ app.run(debug=False, host='0.0.0.0', port=5000)
requirements.txt CHANGED
@@ -10,9 +10,6 @@ imblearn
10
  joblib
11
  tqdm
12
  tenacity
13
- plotly
14
- shinywidgets
15
- shiny
16
- ridgeplot
17
 
18
 
 
10
  joblib
11
  tqdm
12
  tenacity
13
+
 
 
 
14
 
15
 
shared.py DELETED
@@ -1,6 +0,0 @@
1
- from pathlib import Path
2
-
3
- import pandas as pd
4
-
5
- app_dir = Path(__file__).parent
6
- tips = pd.read_csv(app_dir / "tips.csv")
 
 
 
 
 
 
 
styles.css DELETED
@@ -1,12 +0,0 @@
1
- :root {
2
- --bslib-sidebar-main-bg: #f8f8f8;
3
- }
4
-
5
- .popover {
6
- --bs-popover-header-bg: #222;
7
- --bs-popover-header-color: #fff;
8
- }
9
-
10
- .popover .btn-close {
11
- filter: var(--bs-btn-close-white-filter);
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
tips.csv DELETED
@@ -1,245 +0,0 @@
1
- total_bill,tip,sex,smoker,day,time,size
2
- 16.99,1.01,Female,No,Sun,Dinner,2
3
- 10.34,1.66,Male,No,Sun,Dinner,3
4
- 21.01,3.5,Male,No,Sun,Dinner,3
5
- 23.68,3.31,Male,No,Sun,Dinner,2
6
- 24.59,3.61,Female,No,Sun,Dinner,4
7
- 25.29,4.71,Male,No,Sun,Dinner,4
8
- 8.77,2.0,Male,No,Sun,Dinner,2
9
- 26.88,3.12,Male,No,Sun,Dinner,4
10
- 15.04,1.96,Male,No,Sun,Dinner,2
11
- 14.78,3.23,Male,No,Sun,Dinner,2
12
- 10.27,1.71,Male,No,Sun,Dinner,2
13
- 35.26,5.0,Female,No,Sun,Dinner,4
14
- 15.42,1.57,Male,No,Sun,Dinner,2
15
- 18.43,3.0,Male,No,Sun,Dinner,4
16
- 14.83,3.02,Female,No,Sun,Dinner,2
17
- 21.58,3.92,Male,No,Sun,Dinner,2
18
- 10.33,1.67,Female,No,Sun,Dinner,3
19
- 16.29,3.71,Male,No,Sun,Dinner,3
20
- 16.97,3.5,Female,No,Sun,Dinner,3
21
- 20.65,3.35,Male,No,Sat,Dinner,3
22
- 17.92,4.08,Male,No,Sat,Dinner,2
23
- 20.29,2.75,Female,No,Sat,Dinner,2
24
- 15.77,2.23,Female,No,Sat,Dinner,2
25
- 39.42,7.58,Male,No,Sat,Dinner,4
26
- 19.82,3.18,Male,No,Sat,Dinner,2
27
- 17.81,2.34,Male,No,Sat,Dinner,4
28
- 13.37,2.0,Male,No,Sat,Dinner,2
29
- 12.69,2.0,Male,No,Sat,Dinner,2
30
- 21.7,4.3,Male,No,Sat,Dinner,2
31
- 19.65,3.0,Female,No,Sat,Dinner,2
32
- 9.55,1.45,Male,No,Sat,Dinner,2
33
- 18.35,2.5,Male,No,Sat,Dinner,4
34
- 15.06,3.0,Female,No,Sat,Dinner,2
35
- 20.69,2.45,Female,No,Sat,Dinner,4
36
- 17.78,3.27,Male,No,Sat,Dinner,2
37
- 24.06,3.6,Male,No,Sat,Dinner,3
38
- 16.31,2.0,Male,No,Sat,Dinner,3
39
- 16.93,3.07,Female,No,Sat,Dinner,3
40
- 18.69,2.31,Male,No,Sat,Dinner,3
41
- 31.27,5.0,Male,No,Sat,Dinner,3
42
- 16.04,2.24,Male,No,Sat,Dinner,3
43
- 17.46,2.54,Male,No,Sun,Dinner,2
44
- 13.94,3.06,Male,No,Sun,Dinner,2
45
- 9.68,1.32,Male,No,Sun,Dinner,2
46
- 30.4,5.6,Male,No,Sun,Dinner,4
47
- 18.29,3.0,Male,No,Sun,Dinner,2
48
- 22.23,5.0,Male,No,Sun,Dinner,2
49
- 32.4,6.0,Male,No,Sun,Dinner,4
50
- 28.55,2.05,Male,No,Sun,Dinner,3
51
- 18.04,3.0,Male,No,Sun,Dinner,2
52
- 12.54,2.5,Male,No,Sun,Dinner,2
53
- 10.29,2.6,Female,No,Sun,Dinner,2
54
- 34.81,5.2,Female,No,Sun,Dinner,4
55
- 9.94,1.56,Male,No,Sun,Dinner,2
56
- 25.56,4.34,Male,No,Sun,Dinner,4
57
- 19.49,3.51,Male,No,Sun,Dinner,2
58
- 38.01,3.0,Male,Yes,Sat,Dinner,4
59
- 26.41,1.5,Female,No,Sat,Dinner,2
60
- 11.24,1.76,Male,Yes,Sat,Dinner,2
61
- 48.27,6.73,Male,No,Sat,Dinner,4
62
- 20.29,3.21,Male,Yes,Sat,Dinner,2
63
- 13.81,2.0,Male,Yes,Sat,Dinner,2
64
- 11.02,1.98,Male,Yes,Sat,Dinner,2
65
- 18.29,3.76,Male,Yes,Sat,Dinner,4
66
- 17.59,2.64,Male,No,Sat,Dinner,3
67
- 20.08,3.15,Male,No,Sat,Dinner,3
68
- 16.45,2.47,Female,No,Sat,Dinner,2
69
- 3.07,1.0,Female,Yes,Sat,Dinner,1
70
- 20.23,2.01,Male,No,Sat,Dinner,2
71
- 15.01,2.09,Male,Yes,Sat,Dinner,2
72
- 12.02,1.97,Male,No,Sat,Dinner,2
73
- 17.07,3.0,Female,No,Sat,Dinner,3
74
- 26.86,3.14,Female,Yes,Sat,Dinner,2
75
- 25.28,5.0,Female,Yes,Sat,Dinner,2
76
- 14.73,2.2,Female,No,Sat,Dinner,2
77
- 10.51,1.25,Male,No,Sat,Dinner,2
78
- 17.92,3.08,Male,Yes,Sat,Dinner,2
79
- 27.2,4.0,Male,No,Thur,Lunch,4
80
- 22.76,3.0,Male,No,Thur,Lunch,2
81
- 17.29,2.71,Male,No,Thur,Lunch,2
82
- 19.44,3.0,Male,Yes,Thur,Lunch,2
83
- 16.66,3.4,Male,No,Thur,Lunch,2
84
- 10.07,1.83,Female,No,Thur,Lunch,1
85
- 32.68,5.0,Male,Yes,Thur,Lunch,2
86
- 15.98,2.03,Male,No,Thur,Lunch,2
87
- 34.83,5.17,Female,No,Thur,Lunch,4
88
- 13.03,2.0,Male,No,Thur,Lunch,2
89
- 18.28,4.0,Male,No,Thur,Lunch,2
90
- 24.71,5.85,Male,No,Thur,Lunch,2
91
- 21.16,3.0,Male,No,Thur,Lunch,2
92
- 28.97,3.0,Male,Yes,Fri,Dinner,2
93
- 22.49,3.5,Male,No,Fri,Dinner,2
94
- 5.75,1.0,Female,Yes,Fri,Dinner,2
95
- 16.32,4.3,Female,Yes,Fri,Dinner,2
96
- 22.75,3.25,Female,No,Fri,Dinner,2
97
- 40.17,4.73,Male,Yes,Fri,Dinner,4
98
- 27.28,4.0,Male,Yes,Fri,Dinner,2
99
- 12.03,1.5,Male,Yes,Fri,Dinner,2
100
- 21.01,3.0,Male,Yes,Fri,Dinner,2
101
- 12.46,1.5,Male,No,Fri,Dinner,2
102
- 11.35,2.5,Female,Yes,Fri,Dinner,2
103
- 15.38,3.0,Female,Yes,Fri,Dinner,2
104
- 44.3,2.5,Female,Yes,Sat,Dinner,3
105
- 22.42,3.48,Female,Yes,Sat,Dinner,2
106
- 20.92,4.08,Female,No,Sat,Dinner,2
107
- 15.36,1.64,Male,Yes,Sat,Dinner,2
108
- 20.49,4.06,Male,Yes,Sat,Dinner,2
109
- 25.21,4.29,Male,Yes,Sat,Dinner,2
110
- 18.24,3.76,Male,No,Sat,Dinner,2
111
- 14.31,4.0,Female,Yes,Sat,Dinner,2
112
- 14.0,3.0,Male,No,Sat,Dinner,2
113
- 7.25,1.0,Female,No,Sat,Dinner,1
114
- 38.07,4.0,Male,No,Sun,Dinner,3
115
- 23.95,2.55,Male,No,Sun,Dinner,2
116
- 25.71,4.0,Female,No,Sun,Dinner,3
117
- 17.31,3.5,Female,No,Sun,Dinner,2
118
- 29.93,5.07,Male,No,Sun,Dinner,4
119
- 10.65,1.5,Female,No,Thur,Lunch,2
120
- 12.43,1.8,Female,No,Thur,Lunch,2
121
- 24.08,2.92,Female,No,Thur,Lunch,4
122
- 11.69,2.31,Male,No,Thur,Lunch,2
123
- 13.42,1.68,Female,No,Thur,Lunch,2
124
- 14.26,2.5,Male,No,Thur,Lunch,2
125
- 15.95,2.0,Male,No,Thur,Lunch,2
126
- 12.48,2.52,Female,No,Thur,Lunch,2
127
- 29.8,4.2,Female,No,Thur,Lunch,6
128
- 8.52,1.48,Male,No,Thur,Lunch,2
129
- 14.52,2.0,Female,No,Thur,Lunch,2
130
- 11.38,2.0,Female,No,Thur,Lunch,2
131
- 22.82,2.18,Male,No,Thur,Lunch,3
132
- 19.08,1.5,Male,No,Thur,Lunch,2
133
- 20.27,2.83,Female,No,Thur,Lunch,2
134
- 11.17,1.5,Female,No,Thur,Lunch,2
135
- 12.26,2.0,Female,No,Thur,Lunch,2
136
- 18.26,3.25,Female,No,Thur,Lunch,2
137
- 8.51,1.25,Female,No,Thur,Lunch,2
138
- 10.33,2.0,Female,No,Thur,Lunch,2
139
- 14.15,2.0,Female,No,Thur,Lunch,2
140
- 16.0,2.0,Male,Yes,Thur,Lunch,2
141
- 13.16,2.75,Female,No,Thur,Lunch,2
142
- 17.47,3.5,Female,No,Thur,Lunch,2
143
- 34.3,6.7,Male,No,Thur,Lunch,6
144
- 41.19,5.0,Male,No,Thur,Lunch,5
145
- 27.05,5.0,Female,No,Thur,Lunch,6
146
- 16.43,2.3,Female,No,Thur,Lunch,2
147
- 8.35,1.5,Female,No,Thur,Lunch,2
148
- 18.64,1.36,Female,No,Thur,Lunch,3
149
- 11.87,1.63,Female,No,Thur,Lunch,2
150
- 9.78,1.73,Male,No,Thur,Lunch,2
151
- 7.51,2.0,Male,No,Thur,Lunch,2
152
- 14.07,2.5,Male,No,Sun,Dinner,2
153
- 13.13,2.0,Male,No,Sun,Dinner,2
154
- 17.26,2.74,Male,No,Sun,Dinner,3
155
- 24.55,2.0,Male,No,Sun,Dinner,4
156
- 19.77,2.0,Male,No,Sun,Dinner,4
157
- 29.85,5.14,Female,No,Sun,Dinner,5
158
- 48.17,5.0,Male,No,Sun,Dinner,6
159
- 25.0,3.75,Female,No,Sun,Dinner,4
160
- 13.39,2.61,Female,No,Sun,Dinner,2
161
- 16.49,2.0,Male,No,Sun,Dinner,4
162
- 21.5,3.5,Male,No,Sun,Dinner,4
163
- 12.66,2.5,Male,No,Sun,Dinner,2
164
- 16.21,2.0,Female,No,Sun,Dinner,3
165
- 13.81,2.0,Male,No,Sun,Dinner,2
166
- 17.51,3.0,Female,Yes,Sun,Dinner,2
167
- 24.52,3.48,Male,No,Sun,Dinner,3
168
- 20.76,2.24,Male,No,Sun,Dinner,2
169
- 31.71,4.5,Male,No,Sun,Dinner,4
170
- 10.59,1.61,Female,Yes,Sat,Dinner,2
171
- 10.63,2.0,Female,Yes,Sat,Dinner,2
172
- 50.81,10.0,Male,Yes,Sat,Dinner,3
173
- 15.81,3.16,Male,Yes,Sat,Dinner,2
174
- 7.25,5.15,Male,Yes,Sun,Dinner,2
175
- 31.85,3.18,Male,Yes,Sun,Dinner,2
176
- 16.82,4.0,Male,Yes,Sun,Dinner,2
177
- 32.9,3.11,Male,Yes,Sun,Dinner,2
178
- 17.89,2.0,Male,Yes,Sun,Dinner,2
179
- 14.48,2.0,Male,Yes,Sun,Dinner,2
180
- 9.6,4.0,Female,Yes,Sun,Dinner,2
181
- 34.63,3.55,Male,Yes,Sun,Dinner,2
182
- 34.65,3.68,Male,Yes,Sun,Dinner,4
183
- 23.33,5.65,Male,Yes,Sun,Dinner,2
184
- 45.35,3.5,Male,Yes,Sun,Dinner,3
185
- 23.17,6.5,Male,Yes,Sun,Dinner,4
186
- 40.55,3.0,Male,Yes,Sun,Dinner,2
187
- 20.69,5.0,Male,No,Sun,Dinner,5
188
- 20.9,3.5,Female,Yes,Sun,Dinner,3
189
- 30.46,2.0,Male,Yes,Sun,Dinner,5
190
- 18.15,3.5,Female,Yes,Sun,Dinner,3
191
- 23.1,4.0,Male,Yes,Sun,Dinner,3
192
- 15.69,1.5,Male,Yes,Sun,Dinner,2
193
- 19.81,4.19,Female,Yes,Thur,Lunch,2
194
- 28.44,2.56,Male,Yes,Thur,Lunch,2
195
- 15.48,2.02,Male,Yes,Thur,Lunch,2
196
- 16.58,4.0,Male,Yes,Thur,Lunch,2
197
- 7.56,1.44,Male,No,Thur,Lunch,2
198
- 10.34,2.0,Male,Yes,Thur,Lunch,2
199
- 43.11,5.0,Female,Yes,Thur,Lunch,4
200
- 13.0,2.0,Female,Yes,Thur,Lunch,2
201
- 13.51,2.0,Male,Yes,Thur,Lunch,2
202
- 18.71,4.0,Male,Yes,Thur,Lunch,3
203
- 12.74,2.01,Female,Yes,Thur,Lunch,2
204
- 13.0,2.0,Female,Yes,Thur,Lunch,2
205
- 16.4,2.5,Female,Yes,Thur,Lunch,2
206
- 20.53,4.0,Male,Yes,Thur,Lunch,4
207
- 16.47,3.23,Female,Yes,Thur,Lunch,3
208
- 26.59,3.41,Male,Yes,Sat,Dinner,3
209
- 38.73,3.0,Male,Yes,Sat,Dinner,4
210
- 24.27,2.03,Male,Yes,Sat,Dinner,2
211
- 12.76,2.23,Female,Yes,Sat,Dinner,2
212
- 30.06,2.0,Male,Yes,Sat,Dinner,3
213
- 25.89,5.16,Male,Yes,Sat,Dinner,4
214
- 48.33,9.0,Male,No,Sat,Dinner,4
215
- 13.27,2.5,Female,Yes,Sat,Dinner,2
216
- 28.17,6.5,Female,Yes,Sat,Dinner,3
217
- 12.9,1.1,Female,Yes,Sat,Dinner,2
218
- 28.15,3.0,Male,Yes,Sat,Dinner,5
219
- 11.59,1.5,Male,Yes,Sat,Dinner,2
220
- 7.74,1.44,Male,Yes,Sat,Dinner,2
221
- 30.14,3.09,Female,Yes,Sat,Dinner,4
222
- 12.16,2.2,Male,Yes,Fri,Lunch,2
223
- 13.42,3.48,Female,Yes,Fri,Lunch,2
224
- 8.58,1.92,Male,Yes,Fri,Lunch,1
225
- 15.98,3.0,Female,No,Fri,Lunch,3
226
- 13.42,1.58,Male,Yes,Fri,Lunch,2
227
- 16.27,2.5,Female,Yes,Fri,Lunch,2
228
- 10.09,2.0,Female,Yes,Fri,Lunch,2
229
- 20.45,3.0,Male,No,Sat,Dinner,4
230
- 13.28,2.72,Male,No,Sat,Dinner,2
231
- 22.12,2.88,Female,Yes,Sat,Dinner,2
232
- 24.01,2.0,Male,Yes,Sat,Dinner,4
233
- 15.69,3.0,Male,Yes,Sat,Dinner,3
234
- 11.61,3.39,Male,No,Sat,Dinner,2
235
- 10.77,1.47,Male,No,Sat,Dinner,2
236
- 15.53,3.0,Male,Yes,Sat,Dinner,2
237
- 10.07,1.25,Male,No,Sat,Dinner,2
238
- 12.6,1.0,Male,Yes,Sat,Dinner,2
239
- 32.83,1.17,Male,Yes,Sat,Dinner,2
240
- 35.83,4.67,Female,No,Sat,Dinner,3
241
- 29.03,5.92,Male,No,Sat,Dinner,3
242
- 27.18,2.0,Female,Yes,Sat,Dinner,2
243
- 22.67,2.0,Male,Yes,Sat,Dinner,2
244
- 17.82,1.75,Male,No,Sat,Dinner,2
245
- 18.78,3.0,Female,No,Thur,Dinner,2