Dama12 commited on
Commit
70d16f2
·
1 Parent(s): 4282d4c

Déploiement de l'API Flask sur Hugging Face

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -8
  2. README.md +19 -16
  3. allinone.py +440 -0
  4. requirements.txt +10 -5
Dockerfile CHANGED
@@ -1,13 +1,17 @@
1
- FROM python:3.12
 
2
 
3
- WORKDIR /code
 
4
 
5
- COPY ./requirements.txt /code/requirements.txt
 
6
 
7
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
8
 
9
- COPY . .
 
10
 
11
- EXPOSE 7860
12
-
13
- CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Utiliser une image de base officielle de Python
2
+ FROM python:3.9-slim
3
 
4
+ # Définir le répertoire de travail dans le conteneur
5
+ WORKDIR /app
6
 
7
+ # Copier le code dans le conteneur
8
+ COPY . /app
9
 
10
+ # Installer les dépendances
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Exposer le port sur lequel l'application va tourner
14
+ EXPOSE 5000
15
 
16
+ # Démarrer l'application
17
+ CMD ["python", "allinone.py"]
 
README.md CHANGED
@@ -1,21 +1,24 @@
1
- ---
2
- title: Priority Prediction
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: Ce projet implémente un modèle de machine learning pour préd
10
- ---
11
 
12
- This is a templated Space for [Shiny for Python](https://shiny.rstudio.com/py/).
13
 
 
14
 
15
- To get started with a new app do the following:
 
 
 
 
 
 
 
 
 
 
16
 
17
- 1) Install Shiny with `pip install shiny`
18
- 2) Create a new app with `shiny create`
19
- 3) Then run the app with `shiny run --reload`
20
 
21
- To learn more about this framework please see the [Documentation](https://shiny.rstudio.com/py/docs/overview.html).
 
 
 
 
1
+ # Prédiction de Priorité et Services Médicaux
 
 
 
 
 
 
 
 
 
2
 
3
+ Ce projet implémente un modèle de machine learning pour prédire la priorité des patients et les services médicaux recommandés en fonction de leurs caractéristiques.
4
 
5
+ ## Dépendances
6
 
7
+ - Flask
8
+ - flask_cors
9
+ - pandas
10
+ - numpy
11
+ - scikit-learn
12
+ - xgboost
13
+ - lightgbm
14
+ - imblearn
15
+ - joblib
16
+ - tqdm
17
+ - tenacity
18
 
19
+ ## Installation
 
 
20
 
21
+ 1. Clonez ce repository :
22
+ ```bash
23
+ git clone <url-du-repository>
24
+ cd <nom-du-dossier>
allinone.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from xgboost import XGBClassifier
4
+ from lightgbm import LGBMClassifier
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sklearn.svm import SVC
8
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
9
+ from sklearn.model_selection import StratifiedKFold
10
+ from sklearn.metrics import classification_report, recall_score, f1_score
11
+ from sklearn.impute import SimpleImputer
12
+ from imblearn.over_sampling import SMOTE
13
+ from imblearn.under_sampling import RandomUnderSampler
14
+ from imblearn.pipeline import Pipeline
15
+ import joblib
16
+ from flask import Flask, request, jsonify
17
+ from flask_cors import CORS
18
+ import os
19
+ import warnings
20
+ import time
21
+ from tqdm import tqdm
22
+ import threading
23
+ import logging
24
+ from tenacity import retry, wait_fixed, stop_after_attempt
25
+
26
+ warnings.filterwarnings('ignore', category=UserWarning)
27
+ os.environ["LOKY_MAX_CPU_COUNT"] = "1"
28
+
29
+ logging.basicConfig(level=logging.INFO)
30
+ logger = logging.getLogger(__name__)
31
+
32
+ app = Flask(__name__)
33
+ CORS(app)
34
+
35
+ NEW_DATA_FILE = 'new_data.csv'
36
+ DATASET_PATH = "my_datasheet_80000.csv"
37
+ MIN_NEW_SAMPLES_FOR_RETRAIN = 100
38
+
39
+ # Feature sets for each task
40
+ PRIORITY_FEATURES = [
41
+ 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
42
+ 'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
43
+ 'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
44
+ ]
45
+
46
+ SERVICE_FEATURES = [
47
+ 'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
48
+ 'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
49
+ ]
50
+
51
+ priority_model = None
52
+ service_model = None
53
+ priority_scaler = None
54
+ service_scaler = None
55
+ priority_imputer = None
56
+ service_imputer = None
57
+ label_encoder_service = LabelEncoder()
58
+
59
+ model_lock = threading.Lock()
60
+
61
+ def enhanced_features(df):
62
+ df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
63
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
64
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
65
+ df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
66
+ (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
67
+ (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
68
+ df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
69
+ (row['Age'] < 12 and row['Pouls'] > 120) or
70
+ (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
71
+ df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
72
+ (row['Age'] < 12 and row['Pouls'] < 70) or
73
+ (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
74
+ df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
75
+ df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
76
+ df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
77
+ df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
78
+ df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
79
+ df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
80
+ df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
81
+ df['SpO2_Log'] = np.log1p(df['SpO2'])
82
+ df['Temp_Squared'] = df['Temperature'] ** 2
83
+ df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
84
+ df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
85
+ df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
86
+ df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
87
+ df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
88
+ df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
89
+ df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
90
+ df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
91
+ (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
92
+ return df
93
+
94
+ def compute_service_and_priority(row):
95
+ age = row['Age']
96
+ spO2 = row['SpO2']
97
+ frq_resp = row['Frquce_Rprtr(rpm)']
98
+ pouls = row['Pouls']
99
+ ecg = row['ECG']
100
+ pa = row['PA']
101
+ temp = row['Temperature']
102
+ enceinte = row['Enceinte']
103
+ imc = row['IMC']
104
+
105
+ if age <= 18:
106
+ service = 'Pédiatriques'
107
+ elif enceinte:
108
+ service = 'Gynécologie/Obstétrique'
109
+ elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
110
+ service = 'Neurologie'
111
+ elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
112
+ service = 'Cardiorespiratoire'
113
+ elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
114
+ (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
115
+ service = 'Médecine générale'
116
+ elif temp > 40:
117
+ service = 'Radiothérapie'
118
+ else:
119
+ service = 'Chirurgie'
120
+
121
+ if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
122
+ priorite = 1
123
+ elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
124
+ priorite = 2
125
+ elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
126
+ priorite = 3
127
+ elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
128
+ priorite = 4
129
+ else:
130
+ priorite = 5
131
+
132
+ return service, priorite
133
+
134
+ def get_smote_strategy(y, max_samples=1000):
135
+ class_counts = pd.Series(y).value_counts()
136
+ strategy = {}
137
+ for cls, count in class_counts.items():
138
+ target = min(max_samples, max(count * 2, 100)) # Ensure reasonable class sizes
139
+ return strategy
140
+
141
+ def train_priority_model():
142
+ global priority_model, priority_scaler, priority_imputer
143
+ try:
144
+ data = pd.read_csv(DATASET_PATH)
145
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
146
+ data['Enceinte'] = data['Enceinte'].astype(int)
147
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
148
+ data = enhanced_features(data)
149
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
150
+ data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
151
+
152
+ X = data[PRIORITY_FEATURES]
153
+ y = data['Priorite'].values - 1 # Shift to 0-based indexing
154
+
155
+ priority_imputer = SimpleImputer(strategy='median')
156
+ X_imputed = priority_imputer.fit_transform(X)
157
+ priority_scaler = StandardScaler()
158
+ X_scaled = priority_scaler.fit_transform(X_imputed)
159
+
160
+ models = {
161
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
162
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
163
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
164
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
165
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
166
+ 'SVM': SVC(probability=True, random_state=42)
167
+ }
168
+
169
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
170
+ results = {}
171
+
172
+ for name, model in models.items():
173
+ logger.info(f"\nEvaluating {name} for Priority...")
174
+ scores = {'f1': [], 'recall_p1': [], 'time': []}
175
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
176
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
177
+ y_train, y_test = y[train_idx], y[test_idx]
178
+
179
+ min_class_size = pd.Series(y_train).value_counts().min()
180
+ k_neighbors = min(5, max(1, min_class_size - 1))
181
+ pipeline = Pipeline([
182
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
183
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
184
+ ])
185
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
186
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
187
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
188
+
189
+ start_time = time.time()
190
+ model.fit(X_train_res, y_train_res)
191
+ train_time = time.time() - start_time
192
+
193
+ y_pred = model.predict(X_test)
194
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
195
+ scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
196
+ scores['time'].append(train_time)
197
+ logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
198
+
199
+ results[name] = {
200
+ 'f1': np.mean(scores['f1']),
201
+ 'recall_p1': np.mean(scores['recall_p1']),
202
+ 'time': np.mean(scores['time'])
203
+ }
204
+ if name == 'LightGBM':
205
+ feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
206
+ logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
207
+
208
+ logger.info("\nPriority Model Comparison:")
209
+ for name, res in results.items():
210
+ logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
211
+
212
+ best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
213
+ logger.info(f"Best Priority Model: {best_model}")
214
+
215
+ with model_lock:
216
+ priority_model = models[best_model]
217
+ priority_model.fit(X_scaled, y)
218
+
219
+ timestamp = int(time.time())
220
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
221
+ joblib.dump(priority_scaler, 'priority_scaler.pkl')
222
+ joblib.dump(priority_imputer, 'priority_imputer.pkl')
223
+ logger.info("Priority model saved.")
224
+ except Exception as e:
225
+ logger.error(f"Error in priority training: {e}")
226
+ raise
227
+
228
+ def train_service_model():
229
+ global service_model, service_scaler, service_imputer, label_encoder_service
230
+ try:
231
+ data = pd.read_csv(DATASET_PATH)
232
+ data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
233
+ data['Enceinte'] = data['Enceinte'].astype(int)
234
+ data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
235
+ data = enhanced_features(data)
236
+ data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
237
+
238
+ X = data[SERVICE_FEATURES]
239
+ y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
240
+
241
+ service_imputer = SimpleImputer(strategy='median')
242
+ X_imputed = service_imputer.fit_transform(X)
243
+ service_scaler = StandardScaler()
244
+ X_scaled = service_scaler.fit_transform(X_imputed)
245
+
246
+ models = {
247
+ 'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
248
+ 'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
249
+ reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
250
+ 'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
251
+ 'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
252
+ 'SVM': SVC(probability=True, random_state=42)
253
+ }
254
+
255
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
256
+ results = {}
257
+
258
+ for name, model in models.items():
259
+ logger.info(f"\nEvaluating {name} for Service...")
260
+ scores = {'f1': [], 'time': []}
261
+ for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
262
+ X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
263
+ y_train, y_test = y[train_idx], y[test_idx]
264
+
265
+ min_class_size = pd.Series(y_train).value_counts().min()
266
+ k_neighbors = min(5, max(1, min_class_size - 1))
267
+ pipeline = Pipeline([
268
+ ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
269
+ ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
270
+ ])
271
+ X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
272
+ class_sizes = pd.Series(y_train_res).value_counts().to_dict()
273
+ logger.info(f"{name} - Resampled class sizes: {class_sizes}")
274
+
275
+ start_time = time.time()
276
+ model.fit(X_train_res, y_train_res)
277
+ train_time = time.time() - start_time
278
+
279
+ y_pred = model.predict(X_test)
280
+ scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
281
+ scores['time'].append(train_time)
282
+
283
+ results[name] = {
284
+ 'f1': np.mean(scores['f1']),
285
+ 'time': np.mean(scores['time'])
286
+ }
287
+ if name == 'LightGBM':
288
+ feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
289
+ logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
290
+
291
+ logger.info("\nService Model Comparison:")
292
+ for name, res in results.items():
293
+ logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
294
+
295
+ best_model = max(results, key=lambda k: results[k]['f1'])
296
+ logger.info(f"Best Service Model: {best_model}")
297
+
298
+ with model_lock:
299
+ service_model = models[best_model]
300
+ service_model.fit(X_scaled, y)
301
+
302
+ timestamp = int(time.time())
303
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
304
+ joblib.dump(service_scaler, 'service_scaler.pkl')
305
+ joblib.dump(service_imputer, 'service_imputer.pkl')
306
+ joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
307
+ logger.info("Service model saved.")
308
+ except Exception as e:
309
+ logger.error(f"Error in service training: {e}")
310
+ raise
311
+
312
+ @retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
313
+ def retrain_models():
314
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
315
+ while True:
316
+ time.sleep(3600)
317
+ if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
318
+ try:
319
+ new_data = pd.read_csv(NEW_DATA_FILE)
320
+ if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
321
+ orig_data = pd.read_csv(DATASET_PATH)
322
+ orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
323
+ orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
324
+ orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
325
+ new_data = enhanced_features(new_data)
326
+ combined_data = pd.concat([orig_data, new_data], ignore_index=True)
327
+
328
+ # Priority retraining
329
+ X_priority = combined_data[PRIORITY_FEATURES]
330
+ y_priority = combined_data['Priorite'].values - 1
331
+ X_priority_imputed = priority_imputer.transform(X_priority)
332
+ X_priority_scaled = priority_scaler.transform(X_priority_imputed)
333
+ with model_lock:
334
+ priority_model.fit(X_priority_scaled, y_priority)
335
+
336
+ # Service retraining
337
+ X_service = combined_data[SERVICE_FEATURES]
338
+ y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
339
+ X_service_imputed = service_imputer.transform(X_service)
340
+ X_service_scaled = service_scaler.transform(X_service_imputed)
341
+ with model_lock:
342
+ service_model.fit(X_service_scaled, y_service)
343
+
344
+ timestamp = int(time.time())
345
+ joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
346
+ joblib.dump(service_model, f'service_model_{timestamp}.pkl')
347
+ new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
348
+ open(NEW_DATA_FILE, 'w').close()
349
+ logger.info("Models retrained and saved.")
350
+ except Exception as e:
351
+ logger.error(f"Error in retrain: {e}")
352
+
353
+ @app.route('/predict', methods=['POST'])
354
+ def predict():
355
+ global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
356
+ try:
357
+ data = request.get_json()
358
+ required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
359
+ missing_fields = [field for field in required_fields if field not in data]
360
+ if missing_fields:
361
+ return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
362
+
363
+ input_data = {
364
+ 'Age': float(data['age']),
365
+ 'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
366
+ 'Enceinte': 1 if bool(data['enceinte']) else 0,
367
+ 'SpO2': float(data['spo2']),
368
+ 'Frquce_Rprtr(rpm)': float(data['freq_resp']),
369
+ 'Pouls': float(data['pouls']),
370
+ 'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
371
+ 'PA': float(data['pa']),
372
+ 'Temperature': float(data['temperature']),
373
+ 'IMC': float(data['imc']),
374
+ }
375
+
376
+ input_df = pd.DataFrame([input_data])
377
+ input_df = enhanced_features(input_df)
378
+ suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
379
+ input_df['Suggested_Priority'] = suggested_priority
380
+
381
+ with model_lock:
382
+ # Priority prediction
383
+ priority_input = input_df[PRIORITY_FEATURES]
384
+ priority_imputed = priority_imputer.transform(priority_input)
385
+ priority_scaled = priority_scaler.transform(priority_imputed)
386
+ priority_probs = priority_model.predict_proba(priority_scaled)[0]
387
+ priority_pred = np.argmax(priority_probs) + 1
388
+ priority_conf = float(max(priority_probs))
389
+
390
+ # Service prediction
391
+ service_input = input_df[SERVICE_FEATURES]
392
+ service_imputed = service_imputer.transform(service_input)
393
+ service_scaled = service_scaler.transform(service_imputed)
394
+ service_probs = service_model.predict_proba(service_scaled)[0]
395
+ service_pred_idx = np.argmax(service_probs)
396
+ service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
397
+ service_conf = float(max(service_probs))
398
+
399
+ # Fallback to rule-based logic if confidence is low or critical conditions apply
400
+ if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
401
+ priority_pred = suggested_priority
402
+ if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
403
+ service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
404
+
405
+ input_df['Priorite'] = priority_pred
406
+ input_df['Service_Suivant'] = service_pred
407
+ if not os.path.exists(NEW_DATA_FILE):
408
+ input_df.to_csv(NEW_DATA_FILE, index=False)
409
+ else:
410
+ input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
411
+
412
+ logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
413
+ return jsonify({
414
+ 'priority': int(priority_pred),
415
+ 'service_suivant': service_pred,
416
+ 'priority_confidence': priority_conf,
417
+ 'service_confidence': service_conf
418
+ })
419
+ except Exception as e:
420
+ logger.error(f"Prediction error: {str(e)}")
421
+ return jsonify({'error': str(e)}), 500
422
+
423
+ if __name__ == '__main__':
424
+ FORCE_RETRAIN = True
425
+ if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
426
+ train_priority_model()
427
+ train_service_model()
428
+ else:
429
+ with model_lock:
430
+ priority_model = joblib.load('priority_model.pkl')
431
+ service_model = joblib.load('service_model.pkl')
432
+ priority_scaler = joblib.load('priority_scaler.pkl')
433
+ service_scaler = joblib.load('service_scaler.pkl')
434
+ priority_imputer = joblib.load('priority_imputer.pkl')
435
+ service_imputer = joblib.load('service_imputer.pkl')
436
+ label_encoder_service = joblib.load('label_encoder_service.pkl')
437
+
438
+ retrain_thread = threading.Thread(target=retrain_models, daemon=True)
439
+ retrain_thread.start()
440
+ app.run(debug=False, host='0.0.0.0', port=5000)
requirements.txt CHANGED
@@ -1,6 +1,11 @@
1
- faicons
2
- shiny
3
- shinywidgets
4
- plotly
5
  pandas
6
- ridgeplot
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask-cors
 
 
3
  pandas
4
+ numpy
5
+ scikit-learn
6
+ xgboost
7
+ lightgbm
8
+ imblearn
9
+ joblib
10
+ tqdm
11
+ tenacity