mappi / Data.py
yannthur's picture
Update Data.py (#1)
2beacbe verified
raw
history blame
6.95 kB
import streamlit as st
import pandas as pd
import requests
# Token d'accès Dropbox (⚠️ Ne pas partager publiquement)
ACCESS_TOKEN = 'sl.u.AFp7cEspxNgudf1yznA8QFE1qoqwdF-v1FBippme8XCEEWAHMMfjWTjBCN0occiulnUeLYzharc5KgtdtrlLrycEbtDupW2nH4T6bsmdqwep5Gj_zsjH07kAyqYg0axSFivz4FBHYKXInRFNO4siRnHkY64wqHcACHHH8pCcz3ao0TAoZNS3DwrrwMYhFVFI3cmziEeEkhMEqRPuAbbncJfFmPoCKscz2J4M4HjgSLjoBCtfVf39Dqbg4cfmE46zFhyFHwAAv0nj-2iz7B3InVBRpeIHMLys9we42ojSO9WD3vDuhf806WCve_Hs-zDeIP-McJz5IkCvyVukhLRKsRyXdVOqotYD2ZKhb-tJho6Zqf_8H64p1hcaIZTXby8ENo-7qXJ3IMJiWBO2BlR1FmNr4LvPlYy4YBcfEAgL1BRNM5cgtMJ1MUkudTE1RO_8PVL0TqG2qw3JAXV3lUdT4AQC2b-8T3_y8myME7thuQGqMjt7IZZ-oBNmlw7NP6UBOF3RvM1w6ZPoa4fChpKECcfZQsBUKxuwtwjEcaKUlL_8_duK9RI5Y2FUXISc8VuFrz-_LabYdWWYzXdjb-SWolR32Frl6qnScmM9JkphGj4lT1kRaq35aCWce60EH8NIEtADoRTCkVqrh3IUZ8LaN40X6IO9rRzTJ5KVPiTSKvv8aBx8EzoRcUhLbswEh9VMKMhW5wFoWBYDaEi7H8i609frsvchZv002NfrmV80p9vgjCcMQX17Z100CcB68bcqiD8lOE9p97viVh_h_m2LcK5us9WJqqcDq-Jm57nwT42hnHjXGzunGC-i9opEp8IW7FbvrPjwnO8DE9Smm-ymA3fL2O4oE6TR9lTdYIXQhyrOOey9dyXEcXADC9S6ul_7gBmU9LqxTrnP0YVVEyr-hTwqFy5mBjtRu_VulVh6DJ7ERnmaI8Z7czBXPhs1Mr2meGhOA3Q-Bcmd9nsYue4AEKcMReeoL-w-LN53Zq43Gfe_u-TLES68KU-zALeHlFa1Fy2FstdSRZvc4dF65dUMAnpp4uK7M_4Kuwywz7wARLSdRdXThnJGUHMS2RfEKbnIFpWlQerQ90CQhcN_OF3_STUANBfo5yqJUvodNc2Z7hUH-CzKN-FtoEHyYcorIJpIepZYfwNo2OHtOOJzwIl6FrXaytiBsnkVmyaI7Zw7HMLuEQeyiSb8YVD0Ra_c-ASbZI_PKjrGDmTDOupyyXp7HtLJGaVKeX8FYiDHloZnN_oezg0USWQRIPprqu6KAm-gEDzRzPwKZO4m57gkOG-QOCFUHikNADib9sg5MVpkXeTZEqR8YubmkxGTX6ORF11INEato_LJIeGEFoHXGDc48GTLHyqVVjEieF9CzKRSqw2e2ym21f5jFNWGNgTiywdrjzml2doHqM-JzMfId7TrvTiV6hKqZ5asAOp0ng9yO__ZsK0D34DB56tFi027OeMX3POC69XVFKenI_4T7lYcyBlX'
FILE_PATH = "/data.xlsx" # Chemin du fichier dans Dropbox
# Fonction pour télécharger le fichier Dropbox
def download_file_from_dropbox():
headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}",
"Dropbox-API-Arg": f'{{"path": "{FILE_PATH}"}}'
}
url = "https://content.dropboxapi.com/2/files/download"
response = requests.post(url, headers=headers, stream=True)
if response.status_code == 200:
with open("data.xlsx", "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
return "data.xlsx"
else:
st.error(f"Erreur de téléchargement Dropbox : {response.status_code}")
return None
liste_vaccin = ['YELLOW FEVER', 'Penta', 'VPO', 'VPI', 'nOPV2', 'TD','Mosquirix', 'PCV13', 'MEASLES', 'BCG', 'Janssen', 'Rota', 'MENINGO', 'HPV', 'Mencevax']
data_ = None
# Initialization
if 'data' not in st.session_state:
st.session_state['data'] = None
if 'data_file' not in st.session_state:
st.session_state['data_file'] = None
if 'auto_clean_state' not in st.session_state:
st.session_state['auto_clean_state'] = None
if 'dataframe_mappi_region' not in st.session_state:
st.session_state['dataframe_mappi_region'] = None
# chargement des donnees
@st.cache_data
def load_data(data_file):
data = pd.read_excel(data_file)
return data
# fontion de retraits des colonnes vide
def empty_columns_cleaner(data):
temp = dict(data.isna().sum())
_ = []
for i in list(data.columns):
if temp[f"{i}"] == data.shape[0] :
_.append(i)
data.drop(columns=f"{i}", inplace=True)
return _
# fonction de retrait des colonnes avec une valeur unique
def remove_one_columns_values(data):
_ = []
for i in list(data.columns):
if len(list(data[f"{i}"].unique())) == 1:
data.drop(columns=f"{i}", inplace=True)
_.append(i)
return _
## gestion de l'importation et de la manipulation des donnees
# importation et previsualisation des donnees
if st.session_state['data_file'] is None:
data_file = download_file_from_dropbox()
st.session_state['data_file'] = data_file
else:
data_file = st.session_state['data_file']
if data_file :
data = load_data(data_file)
# remplacement
st.session_state["data"] = data
# afficher le jeu de donnees initiale
st.header("Previsualisation des donnees pre-nettoyage")
# creation des filtres et des fonctions d'explorations
form_1 = st.form("filtre")
with form_1:
list_colonne = st.multiselect("Choisir les colonnes a appliquer", options=list(data.columns), )
list_ligne = st.select_slider("Choisir les lignes a afficher", options=[i for i in range(0,data.shape[0])], value=(0,int(data.shape[0]-1)))
col3, col4 = st.columns([4,1])
with col3:
btn_3 = st.form_submit_button("Appliquer", use_container_width=True)
if btn_3:
if list_colonne :
data_ = data.loc[:,list(list_colonne)]
if list_ligne:
data_ = data_.iloc[[i for i in range(list_ligne[0], list_ligne[1]+1)]]
else:
if list_ligne:
data_ = data
data_ = data_.iloc[[i for i in range(list_ligne[0], list_ligne[1]+1)]]
with col4:
btn_4 = st.form_submit_button("Reinitialiser",use_container_width=True)
if btn_4:
list_colonne.clear()
list_ligne = ()
# choix des colonnees a afficher
expander_1 = st.expander("Previsualisation des donnees pre-nettoyage", expanded=True)
with expander_1:
if data_ is None:
st.dataframe(data)
else:
st.dataframe(data_)
st.write("___")
if st.session_state['auto_clean_state'] == None :
form_2 = st.form(key="form_1")
with form_2:
form_2.subheader("Souhaiter vous nettoyer automatiquement les donnees ?")
col1,col2 = st.columns([1,1])
with col1:
btn_1 = st.form_submit_button("Accepter", use_container_width=True)
if btn_1:
st.session_state['auto_clean_state'] = True
st.rerun()
with col2:
btn_2 = st.form_submit_button("Refuser", use_container_width=True)
if btn_2:
st.session_state['auto_clean_state'] = False
st.rerun()
st.write("___")
if st.session_state['auto_clean_state'] == True :
# netoyage des donnees
st.header("Nettoyage des donnees")
expander_2 = st.expander("Liste des operations")
with expander_2:
st.subheader("Retrait des colonnes vides")
st.write("Liste des colonnes vides supprimer")
_ = empty_columns_cleaner(data)
st.write(_)
st.subheader("Retrait des colonnes mono-value")
st.write("Liste des colonnes mono-value")
_ = remove_one_columns_values(data)
st.write(_)
st.session_state["data"] = data
st.write("___")
# afficher le je de donnees nettoyer
else :
st.warning("Veuillez selectionner le fichier a traiter")