Spaces:

FRANCKYPRO
/

doctolib

Sleeping

App Files Files Community

FRANCKYPRO commited on Jun 17, 2025

Commit

dc1ed2e

verified ·

1 Parent(s): e7deb5a

Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
app.py +311 -0
data/dataset_f1.csv +3 -0
models/xgb_oversampled_model.joblib +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/dataset_f1.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+import requests
+import seaborn as sns
+import matplotlib.pyplot as plt
+import io
+from sqlalchemy import create_engine, text
+from sqlalchemy import create_engine
+# --- Connexion à la base de données ---
+engine = create_engine("mysql+pymysql://root:@localhost/doctolib")
+# --- Chargement du modèle ---
+model = joblib.load('../models/xgb_oversampled_model.joblib')
+# --- Configuration de la page ---
+st.set_page_config(page_title="Doctolib Annulation Prediction", layout="wide", page_icon="🩺")
+# --- Ajout de style riche aux couleurs Doctolib et image de fond sur sidebar ---
+st.markdown("""
+    <style>
+        [data-testid="stSidebar"] > div:first-child {
+            background-image: url('https://assets.entrepreneur.com/content/3x2/2000/1623253746-GettyImages-1273886962.jpg');
+            background-size: cover;
+            background-position: center;
+            padding-top: 60px;
+        }
+        [data-testid="stSidebar"] .css-ng1t4o {
+            background-color: rgba(0, 123, 255, 0.8);
+            border-radius: 12px;
+            padding: 10px;
+            color: white;
+        }
+        [data-testid="stSidebar"] .stSelectbox > div > div {
+            background-color: white;
+            color: #007bff;
+            border-radius: 8px;
+        }
+        .main { padding: 20px; }
+        h1, h2, h3, h4 { color: #0069d9; text-align: center; animation: fadeIn 1.5s ease-in-out; }
+        .stButton>button { background: linear-gradient(90deg, #0069d9, #2b9cd8); color: white; border-radius: 25px; padding: 12px 25px; font-size: 16px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); border: none; }
+        .stButton>button:hover { background: linear-gradient(90deg, #2b9cd8, #0069d9); }
+        .highlight-box { background-color: #ffffff; border-left: 8px solid #0069d9; border-radius: 12px; padding: 25px; box-shadow: 0 4px 20px rgba(0,0,0,0.1); margin-bottom: 20px; }
+        @keyframes fadeIn { from { opacity: 0; transform: translateY(-20px); } to { opacity: 1; transform: translateY(0); } }
+        .footer { text-align: center; margin-top: 50px; font-size: 12px; color: #0069d9; }
+    </style>
+""", unsafe_allow_html=True)
+# --- Sidebar Menu avec fond illustré ---
+menu = st.sidebar.selectbox("Menu", ["Prédiction Temps Réel", "Classification sur CSV", "Système Automatique (Notifications)", "Tableaux de bord statistiques"])
+# --- Affichage du logo ---
+st.image("https://www.osteo-var.com/wp-content/uploads/2019/07/logo-doctolib.png", width=300)
+# --- Section décorative ---
+st.markdown("""
+<div class="highlight-box" style="text-align:center; animation: fadeIn 2s ease-in-out; color: #0069d9;">
+    <h3>🩺 Prévoyez mieux. Évitez les annulations. Améliorez votre planning.</h3>
+    <p>Notre application vous aide à prédire et prévenir les absences, pour une meilleure organisation médicale.</p>
+</div>
+""", unsafe_allow_html=True)
+st.title("Application Doctolib – Prédiction des Annulations de Rendez-vous")
+required_cols = [
+    'Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Disability',
+    'Days_Between_Scheduling_and_Appointment', 'Hospital_Area', 'Specialty',
+    'Facility_Type', 'Distance_km', 'Type_of_Care', 'Previously_Treated', 'Age',
+    'Social_Status', 'SMS_Received', 'Weather_Conditions', 'Appointment_Time',
+    'Gender', 'Consultations_Last_12_Months', 'Waiting_Time_Minutes',
+    'Hospital_Rating', 'Average_Fee', 'Number_days'
+]
+category_mappings = {
+    'Hospital_Area': {'Pigalle': 13760, 'Bastille': 13887, 'Saint-Germain': 13846, 'Belleville': 13885, 'La Défense': 13835, 'Châtelet': 13768, 'Montparnasse': 13810},
+    'Specialty': {'Pédiatrie': 15772, 'Gynécologie': 15785, 'Dermatologie': 15697, 'Cardiologie': 15892, 'Psychiatrie': 15771, 'Neurologie': 15778, 'Ophtalmologie': 15832},
+    'Facility_Type': {'Conventionné': 0, 'Non conventionné': 1},
+    'Type_of_Care': {'Vaccination': 21941, 'Urgence': 22224, 'Suivi': 22173, 'Bilan': 22018, 'Consultation': 22171},
+    'Social_Status': {'Indépendant': 22195, 'Étudiant': 21999, 'Retraité': 22048, 'Sans emploi': 22007, 'Salarié': 22278},
+    'Gender': {'Homme': 1, 'Femme': 0}
+}
+reverse_mappings = {col: {v: k for k, v in mapping.items()} for col, mapping in category_mappings.items()}
+def encode_categories(df):
+    for col, mapping in category_mappings.items():
+        if col in df.columns:
+            df[col] = df[col].map(mapping).fillna(0)
+    return df
+def decode_categories(df):
+    for col, mapping in reverse_mappings.items():
+        if col in df.columns:
+            df[col] = df[col].map(mapping).fillna(df[col])
+    return df
+def seconds_to_time(seconds):
+    h = seconds // 3600
+    m = (seconds % 3600) // 60
+    s = seconds % 60
+    return f"{h:02d}:{m:02d}:{s:02d}"
+def time_to_seconds(time_str):
+    parts = time_str.split(':')
+    if len(parts) == 2:
+        h, m = map(int, parts)
+        s = 0
+    elif len(parts) == 3:
+        h, m, s = map(int, parts)
+    else:
+        raise ValueError("Format d'heure invalide. Utilisez HH:MM ou HH:MM:SS")
+    return h * 3600 + m * 60 + s
+# Traductions françaises des champs
+french_labels = {
+    'Scholarship': "Bourse d'étude",
+    'Hypertension': "Hypertension",
+    'Diabetes': "Diabète",
+    'Alcoholism': "Alcoolisme",
+    'Disability': "Handicap",
+    'Days_Between_Scheduling_and_Appointment': "Jours entre la prise et le rendez-vous",
+    'Hospital_Area': "Zone hospitalière",
+    'Specialty': "Spécialité",
+    'Facility_Type': "Type d'établissement",
+    'Distance_km': "Distance en km",
+    'Type_of_Care': "Type de soin",
+    'Previously_Treated': "Déjà traité",
+    'Age': "Âge",
+    'Social_Status': "Statut social",
+    'SMS_Received': "SMS reçu",
+    'Weather_Conditions': "Conditions météorologiques (0=Favorable, 1=Défavorable)",
+    'Appointment_Time': "Heure du rendez-vous",
+    'Gender': "Genre",
+    'Consultations_Last_12_Months': "Consultations sur 12 mois",
+    'Waiting_Time_Minutes': "Temps d'attente (min)",
+    'Hospital_Rating': "Note de l'hôpital",
+    'Average_Fee': "Frais moyens",
+    'Number_days': "Nombre de jours"
+}
+# Ajout des champs français dans la prédiction
+if menu == "Prédiction Temps Réel":
+    st.subheader("Prédiction en Temps Réel")
+    user_input = {}
+    booking_date = st.date_input("Date de prise de rendez-vous")
+    appointment_date = st.date_input("Date du rendez-vous")
+    number_days = (appointment_date - booking_date).days
+    user_input['Number_days'] = number_days
+    for col in required_cols:
+        if col == 'Number_days':
+            continue
+        label = french_labels.get(col, col)
+        if col == 'Appointment_Time':
+            time_str = st.text_input(f"{label} (HH:MM ou HH:MM:SS)", value="09:00")
+            user_input[col] = time_to_seconds(time_str)
+        elif col in ['Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Disability', 'SMS_Received', 'Previously_Treated']:
+            user_input[col] = st.selectbox(f"{label} (Oui=1, Non=0)", [0, 1])
+        elif col in category_mappings:
+            user_input[col] = st.selectbox(label, list(category_mappings[col].keys()))
+        else:
+            user_input[col] = st.number_input(label, value=0)
+    if st.button("Lancer la prédiction"):
+        input_df = pd.DataFrame([user_input])
+        input_df = encode_categories(input_df)
+        input_df = input_df[required_cols].apply(pd.to_numeric, errors='coerce').fillna(0)
+        prediction = model.predict(input_df)[0]
+        probas = model.predict_proba(input_df)[0]
+        st.success(f"Résultat : {'Annulation probable' if prediction == 1 else 'Présence probable'}")
+        st.write(f"Probabilité d'annulation : {probas[1]*100:.2f}%")
+elif menu == "Classification sur CSV":
+    st.subheader("Classification en Masse (CSV)")
+    uploaded_file = st.file_uploader("Téléverser un fichier CSV (avec colonnes exactes)", type=["csv"])
+    if uploaded_file:
+        st.write(" Fichier reçu côté Streamlit :")
+        st.write(f"Nom du fichier : {uploaded_file.name}")
+        st.write(f"Type de fichier : {uploaded_file.type}")
+        st.write(f"Taille : {uploaded_file.size} octets")
+        try:
+            df_original = pd.read_csv(uploaded_file)
+            df = df_original.copy()
+            st.write(" Aperçu des premières lignes :")
+            st.dataframe(df.head())
+            st.write(" Colonnes détectées :", df.columns.tolist())
+            if 'Appointment_Booking_Date' in df.columns and 'Appointment_Date' in df.columns:
+                df['Appointment_Booking_Date'] = pd.to_datetime(df['Appointment_Booking_Date'])
+                df['Appointment_Date'] = pd.to_datetime(df['Appointment_Date'])
+                df['Number_days'] = (df['Appointment_Date'] - df['Appointment_Booking_Date']).dt.days
+            if 'Appointment_Time' in df.columns:
+                df['Appointment_Time'] = df['Appointment_Time'].apply(time_to_seconds)
+            df_encoded = encode_categories(df)
+            df_encoded = df_encoded[required_cols].apply(pd.to_numeric, errors='coerce').fillna(0)
+            predictions = model.predict(df_encoded)
+            probas = model.predict_proba(df_encoded)[:,1]
+            df_original['prediction'] = predictions
+            df_original['proba_annulation'] = probas
+            if 'Appointment_Time' in df_original.columns:
+                df_original['Appointment_Time'] = df['Appointment_Time'].apply(seconds_to_time)
+            df_original = decode_categories(df_original)
+            st.success("✅ Prédictions terminées ! Voici les résultats :")
+            def highlight_proba(val):
+                return 'background-color: lightblue; color: black;'
+            def highlight_prediction(val):
+                color = 'background-color: red; color: white;' if val == 1 else 'background-color: green; color: white;'
+                return color
+            styled_df = df_original.style.applymap(highlight_proba, subset=['proba_annulation'])
+            styled_df = styled_df.applymap(highlight_prediction, subset=['prediction'])
+            st.dataframe(styled_df)
+            csv_data = df_original.to_csv(index=False).encode('utf-8')
+            excel_buffer = io.BytesIO()
+            with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
+                df_original.to_excel(writer, index=False, sheet_name='Predictions')
+            excel_data = excel_buffer.getvalue()
+            st.download_button("Télécharger en CSV", csv_data, file_name="predictions.csv", mime="text/csv")
+            st.download_button("Télécharger en Excel", excel_data, file_name="predictions.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+        except Exception as e:
+            st.error(f" Erreur lors du traitement du fichier : {e}")
+elif menu == "Système Automatique (Notifications)":
+    st.subheader("Système Automatique avec Notifications")
+    st.write("⚠ Note : Cette fonction contacte une API locale. Assurez-vous que l'API est active et accepte les requêtes locales sans restriction (vérifiez les CORS et les permissions).")
+    if st.button("Vérifier les rendez-vous à risque"):
+        try:
+            response = requests.get("http://localhost:8000/pending_appointments")
+            response.raise_for_status()
+            data = response.json()
+            st.write(" Réponse reçue de l'API:", data)
+            for appt in data['appointments']:
+                input_df = pd.DataFrame([appt['features']])
+                input_df = encode_categories(input_df)
+                input_df = input_df[required_cols].apply(pd.to_numeric, errors='coerce').fillna(0)
+                prediction = model.predict(input_df)[0]
+                if prediction == 1:
+                    notif_response = requests.post("http://localhost:8000/send_notification", json={"appointment_id": appt['id']})
+                    st.write(f"➡ Notification POST response: {notif_response.text}")
+                    if notif_response.status_code == 200:
+                        result = notif_response.json()
+                        st.success(f"Notification envoyée pour le rendez-vous ID {appt['id']} - Statut: {result.get('status', 'OK')}")
+                    else:
+                        st.error(f"Erreur d'envoi pour ID {appt['id']} : {notif_response.status_code}, réponse : {notif_response.text}")
+                else:
+                    st.info(f"Aucun risque détecté pour le rendez-vous ID {appt['id']}")
+        except requests.exceptions.RequestException as e:
+            st.error(f"Erreur lors de la récupération ou de l'envoi : {e}")
+# === DASHBOARD ===
+elif menu == "Tableaux de bord statistiques":
+    st.subheader("📊 Statistiques des rendez-vous depuis la base de données")
+    try:
+        with engine.connect() as conn:
+            df = pd.read_sql(text("SELECT * FROM appointments"), conn)
+        st.markdown("### Nombre de rendez-vous par spécialité")
+        fig1, ax1 = plt.subplots()
+        df['specialty'].value_counts().plot(kind='bar', color='#2b9cd8', ax=ax1)
+        ax1.set_ylabel("Nombre de rendez-vous")
+        ax1.set_xlabel("Spécialité")
+        ax1.set_title("Répartition par spécialité")
+        st.pyplot(fig1)
+        st.markdown("### Statut des rendez-vous")
+        fig2, ax2 = plt.subplots()
+        df['status'].value_counts().plot.pie(autopct='%1.1f%%', colors=["#0069d9", "#28a745", "#dc3545"], ax=ax2)
+        ax2.set_ylabel("")
+        ax2.set_title("Répartition par statut")
+        st.pyplot(fig2)
+        st.markdown("### Répartition par zone hospitalière")
+        fig3, ax3 = plt.subplots()
+        sns.countplot(data=df, y="hospital_area", palette="Blues_r", order=df['hospital_area'].value_counts().index, ax=ax3)
+        ax3.set_title("Zones hospitalières les plus utilisées")
+        st.pyplot(fig3)
+        st.markdown("### Âge des patients")
+        fig4, ax4 = plt.subplots()
+        sns.histplot(df['age'], bins=20, kde=True, color='#007bff', ax=ax4)
+        ax4.set_title("Distribution des âges des patients")
+        st.pyplot(fig4)
+    except Exception as e:
+        st.error(f"Erreur lors du chargement des données : {e}")
+# --- Pied de page ---
+st.markdown("""
+<div class="footer">
+    © 2025 Doctolib Predictor | Créé pour améliorer la santé numérique
+</div>
+""", unsafe_allow_html=True)
+# --- Fin de l'application Streamlit ---

data/dataset_f1.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b682bd0855326fd6636f7217282045ac64cbe537ad416b4280c94621850808
+size 15307746

models/xgb_oversampled_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb7510bc9e28e0146724d8177d2038a88d2a5a445d08521d685c1b5febfc3eec
+size 121682