Spaces:

Dama12
/

priority_prediction

Runtime error

App Files Files Community

Dama12 commited on Mar 19, 2025

Commit

70d16f2

1 Parent(s): 4282d4c

Déploiement de l'API Flask sur Hugging Face

Browse files

Files changed (4) hide show

Dockerfile +12 -8
README.md +19 -16
allinone.py +440 -0
requirements.txt +10 -5

Dockerfile CHANGED Viewed

@@ -1,13 +1,17 @@
-FROM python:3.12
-WORKDIR /code
-COPY ./requirements.txt /code/requirements.txt
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-COPY . .
-EXPOSE 7860
-CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

+# Utiliser une image de base officielle de Python
+FROM python:3.9-slim
+# Définir le répertoire de travail dans le conteneur
+WORKDIR /app
+# Copier le code dans le conteneur
+COPY . /app
+# Installer les dépendances
+RUN pip install --no-cache-dir -r requirements.txt
+# Exposer le port sur lequel l'application va tourner
+EXPOSE 5000
+# Démarrer l'application
+CMD ["python", "allinone.py"]

README.md CHANGED Viewed

@@ -1,21 +1,24 @@
----
-title: Priority Prediction
-emoji: 🌍
-colorFrom: yellow
-colorTo: indigo
-sdk: docker
-pinned: false
-license: mit
-short_description: Ce projet implémente un modèle de machine learning pour préd
----
-This is a templated Space for [Shiny for Python](https://shiny.rstudio.com/py/).
-To get started with a new app do the following:
-1) Install Shiny with `pip install shiny`
-2) Create a new app with `shiny create`
-3) Then run the app with `shiny run --reload`
-To learn more about this framework please see the [Documentation](https://shiny.rstudio.com/py/docs/overview.html).

+# Prédiction de Priorité et Services Médicaux
+Ce projet implémente un modèle de machine learning pour prédire la priorité des patients et les services médicaux recommandés en fonction de leurs caractéristiques.
+## Dépendances
+- Flask
+- flask_cors
+- pandas
+- numpy
+- scikit-learn
+- xgboost
+- lightgbm
+- imblearn
+- joblib
+- tqdm
+- tenacity
+## Installation
+1. Clonez ce repository :
+   ```bash
+   git clone <url-du-repository>
+   cd <nom-du-dossier>

allinone.py ADDED Viewed

	@@ -0,0 +1,440 @@

+import pandas as pd
+import numpy as np
+from xgboost import XGBClassifier
+from lightgbm import LGBMClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import classification_report, recall_score, f1_score
+from sklearn.impute import SimpleImputer
+from imblearn.over_sampling import SMOTE
+from imblearn.under_sampling import RandomUnderSampler
+from imblearn.pipeline import Pipeline
+import joblib
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import os
+import warnings
+import time
+from tqdm import tqdm
+import threading
+import logging
+from tenacity import retry, wait_fixed, stop_after_attempt
+warnings.filterwarnings('ignore', category=UserWarning)
+os.environ["LOKY_MAX_CPU_COUNT"] = "1"
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+CORS(app)
+NEW_DATA_FILE = 'new_data.csv'
+DATASET_PATH = "my_datasheet_80000.csv"
+MIN_NEW_SAMPLES_FOR_RETRAIN = 100
+# Feature sets for each task
+PRIORITY_FEATURES = [
+    'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'PA', 'Temperature', 'SpO2_Severity', 'Tachypnea', 'Bradypnea',
+    'Tachycardia', 'Bradycardia', 'Critical_Signs', 'SpO2_Temp_Ratio', 'Pouls_PA_Ratio', 'Temp_Pouls_Ratio',
+    'SpO2_PA_Diff', 'SpO2_Temp_Diff', 'PA_Pouls_Diff', 'SpO2_Log', 'Temp_Squared', 'Suggested_Priority'
+]
+SERVICE_FEATURES = [
+    'Age', 'Sexe', 'Enceinte', 'SpO2', 'Frquce_Rprtr(rpm)', 'Pouls', 'ECG', 'PA', 'Temperature', 'IMC',
+    'Age_Category', 'Temp_Anomaly', 'PA_High', 'PA_Low', 'Pouls_SpO2_Ratio', 'PA_Temp_Ratio', 'IMC_Temp_Ratio'
+]
+priority_model = None
+service_model = None
+priority_scaler = None
+service_scaler = None
+priority_imputer = None
+service_imputer = None
+label_encoder_service = LabelEncoder()
+model_lock = threading.Lock()
+def enhanced_features(df):
+    df['Tachypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] > 40) or
+                                         (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] > 30) or
+                                         (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] > 20) else 0, axis=1)
+    df['Bradypnea'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Frquce_Rprtr(rpm)'] < 20) or
+                                         (row['Age'] < 12 and row['Frquce_Rprtr(rpm)'] < 12) or
+                                         (row['Age'] >= 12 and row['Frquce_Rprtr(rpm)'] < 8) else 0, axis=1)
+    df['Tachycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] > 160) or
+                                           (row['Age'] < 12 and row['Pouls'] > 120) or
+                                           (row['Age'] >= 12 and row['Pouls'] > 100) else 0, axis=1)
+    df['Bradycardia'] = df.apply(lambda row: 1 if (row['Age'] < 1 and row['Pouls'] < 90) or
+                                           (row['Age'] < 12 and row['Pouls'] < 70) or
+                                           (row['Age'] >= 12 and row['Pouls'] < 50) else 0, axis=1)
+    df['SpO2_Temp_Ratio'] = df['SpO2'] / (df['Temperature'] + 1e-6)
+    df['Pouls_PA_Ratio'] = df['Pouls'] / (df['PA'] + 1e-6)
+    df['Temp_Pouls_Ratio'] = df['Temperature'] / (df['Pouls'] + 1e-6)
+    df['SpO2_PA_Diff'] = df['SpO2'] - df['PA'] / 10
+    df['SpO2_Temp_Diff'] = df['SpO2'] - df['Temperature']
+    df['PA_Pouls_Diff'] = df['PA'] - df['Pouls']
+    df['IMC_Temp_Ratio'] = df['IMC'] / (df['Temperature'] + 1e-6)
+    df['SpO2_Log'] = np.log1p(df['SpO2'])
+    df['Temp_Squared'] = df['Temperature'] ** 2
+    df['Pouls_SpO2_Ratio'] = df['Pouls'] / (df['SpO2'] + 1e-6)
+    df['PA_Temp_Ratio'] = df['PA'] / (df['Temperature'] + 1e-6)
+    df['Age_Category'] = pd.cut(df['Age'], bins=[0, 1, 12, 45, 65, 120], labels=[0, 1, 2, 3, 4])
+    df['Temp_Anomaly'] = df['Temperature'].apply(lambda x: 1 if x < 35 or x > 38 else 0)
+    df['PA_High'] = df['PA'].apply(lambda x: 1 if x > 160 else 0)
+    df['PA_Low'] = df['PA'].apply(lambda x: 1 if x < 90 else 0)
+    df['SpO2_Severity'] = pd.cut(df['SpO2'], bins=[0, 85, 90, 92, 100], labels=[3, 2, 1, 0])
+    df['Critical_Signs'] = ((df['SpO2'] < 85) | (df['Pouls'] > 150) | (df['Temperature'] > 40) |
+                            (df['PA'] > 200) | (df['PA'] < 70)).astype(int)
+    return df
+def compute_service_and_priority(row):
+    age = row['Age']
+    spO2 = row['SpO2']
+    frq_resp = row['Frquce_Rprtr(rpm)']
+    pouls = row['Pouls']
+    ecg = row['ECG']
+    pa = row['PA']
+    temp = row['Temperature']
+    enceinte = row['Enceinte']
+    imc = row['IMC']
+    if age <= 18:
+        service = 'Pédiatriques'
+    elif enceinte:
+        service = 'Gynécologie/Obstétrique'
+    elif ecg == 1 or (pouls < 50 or pouls > 110) or (frq_resp > 20):
+        service = 'Neurologie'
+    elif spO2 < 92 or frq_resp > 18 or pouls > 100 or pa < 90 or pa > 160:
+        service = 'Cardiorespiratoire'
+    elif (imc > 30 and (temp > 38 and temp <= 40) and 70 <= pouls <= 90) or \
+         (70 <= pouls <= 90 and 110 <= pa <= 130 and spO2 >= 97 and temp <= 37.5):
+        service = 'Médecine générale'
+    elif temp > 40:
+        service = 'Radiothérapie'
+    else:
+        service = 'Chirurgie'
+    if spO2 < 85 or temp > 40 or pouls > 150 or pa < 70 or pa > 200:
+        priorite = 1
+    elif spO2 < 88 or temp > 39.5 or pouls > 130 or pa < 80 or pa > 180 or frq_resp > 25:
+        priorite = 2
+    elif spO2 < 90 or temp > 38.5 or pouls > 110 or pa < 90 or pa > 160 or frq_resp > 20:
+        priorite = 3
+    elif spO2 < 92 or temp > 38 or pouls > 100 or pa < 100 or pa > 140 or frq_resp > 18:
+        priorite = 4
+    else:
+        priorite = 5
+    return service, priorite
+def get_smote_strategy(y, max_samples=1000):
+    class_counts = pd.Series(y).value_counts()
+    strategy = {}
+    for cls, count in class_counts.items():
+        target = min(max_samples, max(count * 2, 100))  # Ensure reasonable class sizes
+    return strategy
+def train_priority_model():
+    global priority_model, priority_scaler, priority_imputer
+    try:
+        data = pd.read_csv(DATASET_PATH)
+        data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
+        data['Enceinte'] = data['Enceinte'].astype(int)
+        data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
+        data = enhanced_features(data)
+        data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
+        data['Suggested_Priority'] = data['Suggested_Priority'].astype(int)
+        X = data[PRIORITY_FEATURES]
+        y = data['Priorite'].values - 1  # Shift to 0-based indexing
+        priority_imputer = SimpleImputer(strategy='median')
+        X_imputed = priority_imputer.fit_transform(X)
+        priority_scaler = StandardScaler()
+        X_scaled = priority_scaler.fit_transform(X_imputed)
+        models = {
+            'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
+            'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
+                                      reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
+            'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
+            'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
+            'SVM': SVC(probability=True, random_state=42)
+        }
+        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+        results = {}
+        for name, model in models.items():
+            logger.info(f"\nEvaluating {name} for Priority...")
+            scores = {'f1': [], 'recall_p1': [], 'time': []}
+            for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
+                X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
+                y_train, y_test = y[train_idx], y[test_idx]
+                min_class_size = pd.Series(y_train).value_counts().min()
+                k_neighbors = min(5, max(1, min_class_size - 1))
+                pipeline = Pipeline([
+                    ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
+                    ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
+                ])
+                X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
+                class_sizes = pd.Series(y_train_res).value_counts().to_dict()
+                logger.info(f"{name} - Resampled class sizes: {class_sizes}")
+                start_time = time.time()
+                model.fit(X_train_res, y_train_res)
+                train_time = time.time() - start_time
+                y_pred = model.predict(X_test)
+                scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
+                scores['recall_p1'].append(recall_score(y_test, y_pred, labels=[0], average=None, zero_division=0)[0])
+                scores['time'].append(train_time)
+                logger.info(f"{name} Fold - F1: {scores['f1'][-1]:.3f}, Recall P1: {scores['recall_p1'][-1]:.3f}")
+            results[name] = {
+                'f1': np.mean(scores['f1']),
+                'recall_p1': np.mean(scores['recall_p1']),
+                'time': np.mean(scores['time'])
+            }
+            if name == 'LightGBM':
+                feature_importance = pd.Series(model.feature_importances_, index=PRIORITY_FEATURES).sort_values(ascending=False)
+                logger.info(f"LightGBM Priority Feature Importance:\n{feature_importance}")
+        logger.info("\nPriority Model Comparison:")
+        for name, res in results.items():
+            logger.info(f"{name}: F1={res['f1']:.3f}, Recall P1={res['recall_p1']:.3f}, Time={res['time']:.2f}s")
+        best_model = max(results, key=lambda k: results[k]['f1'] + results[k]['recall_p1'])
+        logger.info(f"Best Priority Model: {best_model}")
+        with model_lock:
+            priority_model = models[best_model]
+            priority_model.fit(X_scaled, y)
+        timestamp = int(time.time())
+        joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
+        joblib.dump(priority_scaler, 'priority_scaler.pkl')
+        joblib.dump(priority_imputer, 'priority_imputer.pkl')
+        logger.info("Priority model saved.")
+    except Exception as e:
+        logger.error(f"Error in priority training: {e}")
+        raise
+def train_service_model():
+    global service_model, service_scaler, service_imputer, label_encoder_service
+    try:
+        data = pd.read_csv(DATASET_PATH)
+        data['Sexe'] = data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
+        data['Enceinte'] = data['Enceinte'].astype(int)
+        data['ECG'] = data['ECG'].map({'Normal': 0, 'Anormal': 1})
+        data = enhanced_features(data)
+        data[['Suggested_Service', 'Suggested_Priority']] = data.apply(compute_service_and_priority, axis=1, result_type='expand')
+        X = data[SERVICE_FEATURES]
+        y = label_encoder_service.fit_transform(data['Service_Suivant'].fillna('Unknown'))
+        service_imputer = SimpleImputer(strategy='median')
+        X_imputed = service_imputer.fit_transform(X)
+        service_scaler = StandardScaler()
+        X_scaled = service_scaler.fit_transform(X_imputed)
+        models = {
+            'XGBoost': XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, n_jobs=-1, random_state=42),
+            'LightGBM': LGBMClassifier(n_estimators=100, max_depth=2, learning_rate=0.05, min_child_samples=5,
+                                      reg_alpha=0.5, reg_lambda=0.5, n_jobs=-1, random_state=42, verbose=-1),
+            'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=8, n_jobs=-1, random_state=42),
+            'LogisticRegression': LogisticRegression(max_iter=1000, multi_class='multinomial', random_state=42),
+            'SVM': SVC(probability=True, random_state=42)
+        }
+        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+        results = {}
+        for name, model in models.items():
+            logger.info(f"\nEvaluating {name} for Service...")
+            scores = {'f1': [], 'time': []}
+            for train_idx, test_idx in tqdm(skf.split(X_scaled, y), total=5):
+                X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
+                y_train, y_test = y[train_idx], y[test_idx]
+                min_class_size = pd.Series(y_train).value_counts().min()
+                k_neighbors = min(5, max(1, min_class_size - 1))
+                pipeline = Pipeline([
+                    ('under', RandomUnderSampler(sampling_strategy='majority', random_state=42)),
+                    ('over', SMOTE(sampling_strategy=get_smote_strategy(y_train), random_state=42, k_neighbors=k_neighbors))
+                ])
+                X_train_res, y_train_res = pipeline.fit_resample(X_train, y_train)
+                class_sizes = pd.Series(y_train_res).value_counts().to_dict()
+                logger.info(f"{name} - Resampled class sizes: {class_sizes}")
+                start_time = time.time()
+                model.fit(X_train_res, y_train_res)
+                train_time = time.time() - start_time
+                y_pred = model.predict(X_test)
+                scores['f1'].append(f1_score(y_test, y_pred, average='macro'))
+                scores['time'].append(train_time)
+            results[name] = {
+                'f1': np.mean(scores['f1']),
+                'time': np.mean(scores['time'])
+            }
+            if name == 'LightGBM':
+                feature_importance = pd.Series(model.feature_importances_, index=SERVICE_FEATURES).sort_values(ascending=False)
+                logger.info(f"LightGBM Service Feature Importance:\n{feature_importance}")
+        logger.info("\nService Model Comparison:")
+        for name, res in results.items():
+            logger.info(f"{name}: F1={res['f1']:.3f}, Time={res['time']:.2f}s")
+        best_model = max(results, key=lambda k: results[k]['f1'])
+        logger.info(f"Best Service Model: {best_model}")
+        with model_lock:
+            service_model = models[best_model]
+            service_model.fit(X_scaled, y)
+        timestamp = int(time.time())
+        joblib.dump(service_model, f'service_model_{timestamp}.pkl')
+        joblib.dump(service_scaler, 'service_scaler.pkl')
+        joblib.dump(service_imputer, 'service_imputer.pkl')
+        joblib.dump(label_encoder_service, 'label_encoder_service.pkl')
+        logger.info("Service model saved.")
+    except Exception as e:
+        logger.error(f"Error in service training: {e}")
+        raise
+@retry(wait=wait_fixed(2), stop=stop_after_attempt(3))
+def retrain_models():
+    global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
+    while True:
+        time.sleep(3600)
+        if os.path.exists(NEW_DATA_FILE) and os.path.getsize(NEW_DATA_FILE) > 0:
+            try:
+                new_data = pd.read_csv(NEW_DATA_FILE)
+                if len(new_data) >= MIN_NEW_SAMPLES_FOR_RETRAIN:
+                    orig_data = pd.read_csv(DATASET_PATH)
+                    orig_data['Sexe'] = orig_data['Sexe'].map({'Masculin': 0, 'Feminin': 1})
+                    orig_data['Enceinte'] = orig_data['Enceinte'].astype(int)
+                    orig_data['ECG'] = orig_data['ECG'].map({'Normal': 0, 'Anormal': 1})
+                    new_data = enhanced_features(new_data)
+                    combined_data = pd.concat([orig_data, new_data], ignore_index=True)
+                    # Priority retraining
+                    X_priority = combined_data[PRIORITY_FEATURES]
+                    y_priority = combined_data['Priorite'].values - 1
+                    X_priority_imputed = priority_imputer.transform(X_priority)
+                    X_priority_scaled = priority_scaler.transform(X_priority_imputed)
+                    with model_lock:
+                        priority_model.fit(X_priority_scaled, y_priority)
+                    # Service retraining
+                    X_service = combined_data[SERVICE_FEATURES]
+                    y_service = label_encoder_service.transform(combined_data['Service_Suivant'].fillna('Unknown'))
+                    X_service_imputed = service_imputer.transform(X_service)
+                    X_service_scaled = service_scaler.transform(X_service_imputed)
+                    with model_lock:
+                        service_model.fit(X_service_scaled, y_service)
+                    timestamp = int(time.time())
+                    joblib.dump(priority_model, f'priority_model_{timestamp}.pkl')
+                    joblib.dump(service_model, f'service_model_{timestamp}.pkl')
+                    new_data.to_csv(f'archive_new_data_{timestamp}.csv', index=False)
+                    open(NEW_DATA_FILE, 'w').close()
+                    logger.info("Models retrained and saved.")
+            except Exception as e:
+                logger.error(f"Error in retrain: {e}")
+@app.route('/predict', methods=['POST'])
+def predict():
+    global priority_model, service_model, priority_scaler, service_scaler, priority_imputer, service_imputer, label_encoder_service
+    try:
+        data = request.get_json()
+        required_fields = ['age', 'sexe', 'enceinte', 'spo2', 'freq_resp', 'pouls', 'ecg', 'pa', 'temperature', 'imc']
+        missing_fields = [field for field in required_fields if field not in data]
+        if missing_fields:
+            return jsonify({'error': f'Missing fields: {", ".join(missing_fields)}'}), 400
+        input_data = {
+            'Age': float(data['age']),
+            'Sexe': 0 if data['sexe'].lower() == 'masculin' else 1,
+            'Enceinte': 1 if bool(data['enceinte']) else 0,
+            'SpO2': float(data['spo2']),
+            'Frquce_Rprtr(rpm)': float(data['freq_resp']),
+            'Pouls': float(data['pouls']),
+            'ECG': 0 if data['ecg'].lower() == 'normal' else 1,
+            'PA': float(data['pa']),
+            'Temperature': float(data['temperature']),
+            'IMC': float(data['imc']),
+        }
+        input_df = pd.DataFrame([input_data])
+        input_df = enhanced_features(input_df)
+        suggested_service, suggested_priority = compute_service_and_priority(input_df.iloc[0])
+        input_df['Suggested_Priority'] = suggested_priority
+        with model_lock:
+            # Priority prediction
+            priority_input = input_df[PRIORITY_FEATURES]
+            priority_imputed = priority_imputer.transform(priority_input)
+            priority_scaled = priority_scaler.transform(priority_imputed)
+            priority_probs = priority_model.predict_proba(priority_scaled)[0]
+            priority_pred = np.argmax(priority_probs) + 1
+            priority_conf = float(max(priority_probs))
+            # Service prediction
+            service_input = input_df[SERVICE_FEATURES]
+            service_imputed = service_imputer.transform(service_input)
+            service_scaled = service_scaler.transform(service_imputed)
+            service_probs = service_model.predict_proba(service_scaled)[0]
+            service_pred_idx = np.argmax(service_probs)
+            service_pred = label_encoder_service.inverse_transform([service_pred_idx])[0]
+            service_conf = float(max(service_probs))
+            # Fallback to rule-based logic if confidence is low or critical conditions apply
+            if priority_conf < 0.7 or input_df['Critical_Signs'][0] == 1:
+                priority_pred = suggested_priority
+            if service_conf < 0.7 or input_df['Enceinte'][0] == 1:
+                service_pred = suggested_service if input_df['Enceinte'][0] == 0 else 'Gynécologie/Obstétrique'
+        input_df['Priorite'] = priority_pred
+        input_df['Service_Suivant'] = service_pred
+        if not os.path.exists(NEW_DATA_FILE):
+            input_df.to_csv(NEW_DATA_FILE, index=False)
+        else:
+            input_df.to_csv(NEW_DATA_FILE, mode='a', header=False, index=False)
+        logger.info(f"Predicted: service={service_pred}, priority={priority_pred}, service_conf={service_conf}, priority_conf={priority_conf}")
+        return jsonify({
+            'priority': int(priority_pred),
+            'service_suivant': service_pred,
+            'priority_confidence': priority_conf,
+            'service_confidence': service_conf
+        })
+    except Exception as e:
+        logger.error(f"Prediction error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    FORCE_RETRAIN = True
+    if FORCE_RETRAIN or not (os.path.exists('priority_model.pkl') and os.path.exists('service_model.pkl')):
+        train_priority_model()
+        train_service_model()
+    else:
+        with model_lock:
+            priority_model = joblib.load('priority_model.pkl')
+            service_model = joblib.load('service_model.pkl')
+            priority_scaler = joblib.load('priority_scaler.pkl')
+            service_scaler = joblib.load('service_scaler.pkl')
+            priority_imputer = joblib.load('priority_imputer.pkl')
+            service_imputer = joblib.load('service_imputer.pkl')
+            label_encoder_service = joblib.load('label_encoder_service.pkl')
+    retrain_thread = threading.Thread(target=retrain_models, daemon=True)
+    retrain_thread.start()
+    app.run(debug=False, host='0.0.0.0', port=5000)

requirements.txt CHANGED Viewed

@@ -1,6 +1,11 @@
-faicons
-shiny
-shinywidgets
-plotly
 pandas
-ridgeplot

+flask
+flask-cors
 pandas
+numpy
+scikit-learn
+xgboost
+lightgbm
+imblearn
+joblib
+tqdm
+tenacity