Spaces:
Sleeping
Sleeping
COULIBALY Bourahima
commited on
Commit
·
2c49a88
1
Parent(s):
38b4487
update
Browse files- .vscode/settings.json +3 -0
- App/class_input_box/input_box.py +60 -28
- App/functions_rupture/functions_gestion.py +355 -147
- App/utils/divers_function.py +175 -77
- App/utils/filter_dataframe.py +19 -7
- App/utils/priorite_pays.py +51 -7
- App/utils/standadisation.py +124 -16
- app.py +7 -9
- pages/🤖_Gestion_de_rupture_famille.py +233 -141
- pages/🦾_Gestion_de_rupture_sous_famille.py +251 -162
.vscode/settings.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"DockerRun.DisableAutoGenerateConfig": true
|
| 3 |
+
}
|
App/class_input_box/input_box.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import streamlit as st
|
| 3 |
-
from typing import Dict
|
| 4 |
|
| 5 |
|
| 6 |
class InputsBox:
|
|
@@ -9,49 +8,82 @@ class InputsBox:
|
|
| 9 |
self.columns = None
|
| 10 |
self.product_id = None
|
| 11 |
self.class_id = None
|
| 12 |
-
|
| 13 |
-
def get_data(self)
|
| 14 |
-
uploaded_file = st.file_uploader(
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
if uploaded_file is not None:
|
| 17 |
-
#try :
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
# self.data = pd.read_csv(uploaded_file,dtype=str, sep=";", encoding="utf-8" )
|
| 22 |
-
|
| 23 |
-
self.columns = self.data.columns.tolist()
|
| 24 |
return self.data
|
| 25 |
|
| 26 |
def valid_produict_id(self) -> int:
|
| 27 |
-
min_len = st.number_input(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return min_len
|
| 29 |
-
|
| 30 |
def valid_class_id(self) -> str:
|
| 31 |
-
valid = st.text_input(
|
|
|
|
|
|
|
| 32 |
return valid.split(";")
|
| 33 |
|
| 34 |
def get_product_id(self) -> str:
|
| 35 |
-
self.product_id = st.selectbox(
|
|
|
|
|
|
|
| 36 |
return self.product_id
|
| 37 |
|
| 38 |
-
|
| 39 |
def get_class_id(self) -> str:
|
| 40 |
-
self.class_id = st.selectbox(
|
|
|
|
|
|
|
| 41 |
return self.class_id
|
| 42 |
-
|
| 43 |
def get_countries(self) -> list:
|
| 44 |
-
countries = st.multiselect(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
return countries
|
| 46 |
-
|
| 47 |
-
def get_number_countries(self) -> int
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def get_proportion(self) -> float:
|
| 52 |
-
proportion = st.number_input(
|
|
|
|
|
|
|
|
|
|
| 53 |
return proportion
|
| 54 |
-
|
| 55 |
def show_proportion(self) -> bool:
|
| 56 |
-
show_condition = st.checkbox(
|
| 57 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import streamlit as st
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
class InputsBox:
|
|
|
|
| 8 |
self.columns = None
|
| 9 |
self.product_id = None
|
| 10 |
self.class_id = None
|
| 11 |
+
|
| 12 |
+
def get_data(self):
|
| 13 |
+
uploaded_file = st.file_uploader(
|
| 14 |
+
"Choose a CSV file with the separator ';' ", type=["csv"]
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
if uploaded_file is not None:
|
| 18 |
+
# try :
|
| 19 |
+
self.data = pd.read_csv(
|
| 20 |
+
uploaded_file, dtype=str, sep=";", encoding="latin-1"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# except :
|
| 24 |
+
# self.data = pd.read_csv(
|
| 25 |
+
# uploaded_file,dtype=str,
|
| 26 |
+
# sep=";",
|
| 27 |
+
# encoding="utf-8" )
|
| 28 |
|
| 29 |
+
self.columns = self.data.columns.tolist()
|
|
|
|
|
|
|
|
|
|
| 30 |
return self.data
|
| 31 |
|
| 32 |
def valid_produict_id(self) -> int:
|
| 33 |
+
min_len = st.number_input(
|
| 34 |
+
"Minimum len of product_id",
|
| 35 |
+
max_value=25,
|
| 36 |
+
min_value=1,
|
| 37 |
+
value=2,
|
| 38 |
+
key="pp"
|
| 39 |
+
)
|
| 40 |
return min_len
|
| 41 |
+
|
| 42 |
def valid_class_id(self) -> str:
|
| 43 |
+
valid = st.text_input(
|
| 44 |
+
"First element of No valid class_id separed by ;"
|
| 45 |
+
)
|
| 46 |
return valid.split(";")
|
| 47 |
|
| 48 |
def get_product_id(self) -> str:
|
| 49 |
+
self.product_id = st.selectbox(
|
| 50 |
+
"product_id (BARCODE)", options=self.columns, key="product_id"
|
| 51 |
+
)
|
| 52 |
return self.product_id
|
| 53 |
|
|
|
|
| 54 |
def get_class_id(self) -> str:
|
| 55 |
+
self.class_id = st.selectbox(
|
| 56 |
+
"class_id (WW_CLASS_KEY)", options=self.columns, key="class_id"
|
| 57 |
+
)
|
| 58 |
return self.class_id
|
| 59 |
+
|
| 60 |
def get_countries(self) -> list:
|
| 61 |
+
countries = st.multiselect(
|
| 62 |
+
"Select countries : ",
|
| 63 |
+
tuple(self.data.COUNTRY_KEY.unique()),
|
| 64 |
+
key="countries",
|
| 65 |
+
)
|
| 66 |
return countries
|
| 67 |
+
|
| 68 |
+
def get_number_countries(self) -> int:
|
| 69 |
+
nb_countries = st.number_input(
|
| 70 |
+
"Number of countries",
|
| 71 |
+
min_value=1,
|
| 72 |
+
max_value=20,
|
| 73 |
+
value=1,
|
| 74 |
+
key="Number of countries",
|
| 75 |
+
)
|
| 76 |
+
return nb_countries
|
| 77 |
+
|
| 78 |
def get_proportion(self) -> float:
|
| 79 |
+
proportion = st.number_input(
|
| 80 |
+
"Proportion",
|
| 81 |
+
min_value=0.10, max_value=1.00, value=0.75, key="proportion"
|
| 82 |
+
)
|
| 83 |
return proportion
|
| 84 |
+
|
| 85 |
def show_proportion(self) -> bool:
|
| 86 |
+
show_condition = st.checkbox(
|
| 87 |
+
"Show data with ratios ", value=True, key="show_ratio_checkbox"
|
| 88 |
+
)
|
| 89 |
+
return show_condition
|
App/functions_rupture/functions_gestion.py
CHANGED
|
@@ -1,174 +1,382 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import pandas as pd
|
| 3 |
-
import streamlit as st
|
| 4 |
-
from App.utils.priorite_pays import
|
|
|
|
| 5 |
import nltk
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
pass
|
| 16 |
-
return
|
| 17 |
-
|
| 18 |
|
| 19 |
-
def calcul_total_par_ligne(data, produit_id, class_id):
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
@st.cache_data
|
| 35 |
-
def
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
non_zero = stacked[stacked != 0]
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
| 42 |
return sparse_matrix
|
| 43 |
|
| 44 |
|
| 45 |
@st.cache_data
|
| 46 |
-
def
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
return Country[produit_id, class_id]
|
| 68 |
|
| 69 |
|
| 70 |
-
def
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
try:
|
| 73 |
-
merged["Countries"] = merged.apply(
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
except Exception as e:
|
| 77 |
-
st.warning(f"
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
df_equa = df[duplicated_subclass]
|
| 100 |
-
df_equa = df_equa[(df_equa.Proportion == 0.5)]
|
| 101 |
-
|
| 102 |
-
df_nequa = df[~df.isin(df_equa)].dropna()
|
| 103 |
-
|
| 104 |
-
return df, df_equa, df_nequa
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
def finale_merge(data, new_data, produit_id, class_id):
|
| 108 |
-
|
| 109 |
-
merged_df = pd.merge(data, new_data, on=["COUNTRY_KEY", produit_id], how="left", suffixes=("", "_y"))
|
| 110 |
-
merged_df[class_id] = merged_df[f"{class_id}_y"].fillna(merged_df[class_id])
|
| 111 |
-
merged_df[f"{class_id[:-4]}_DESC_FR"] = merged_df[f"{class_id[:-4]}_DESC_FR_y"].fillna(merged_df[f"{class_id[:-4]}_DESC_FR"])
|
| 112 |
-
|
| 113 |
-
df_finale = merged_df[[produit_id, "COUNTRY_KEY",class_id, f"{class_id[:-4]}_DESC_FR"]]
|
| 114 |
-
|
| 115 |
-
merged = pd.merge(data, df_finale, how='outer', indicator=True)
|
| 116 |
-
|
| 117 |
-
data_finale = merged[merged['_merge'] != 'both']
|
| 118 |
-
|
| 119 |
-
data_finale = data_finale.rename(columns={'_merge': 'Changements'})
|
| 120 |
-
|
| 121 |
-
data_finale.sort_values(by =[produit_id], ascending=True, inplace =True)
|
| 122 |
-
|
| 123 |
-
data_finale["Changements"] = data_finale["Changements"].apply(lambda x : "Avant" if x == "left_only" else "Après")
|
| 124 |
-
|
| 125 |
-
data_finale = data_finale[[produit_id, "COUNTRY_KEY" , class_id, f"{class_id[:-4]}_DESC_FR", "Changements"]]
|
| 126 |
-
data_finale.drop_duplicates(inplace=True)
|
| 127 |
-
|
| 128 |
-
return data_finale, df_finale
|
| 129 |
-
|
| 130 |
-
# brouillon
|
| 131 |
-
|
| 132 |
-
def data_1_1(df_nequa, produit_id, class_id):
|
| 133 |
-
df_nequa_2 = df_nequa[(df_nequa.Countries.apply(lambda x: len(x) > 1))]
|
| 134 |
-
max_poids_index = df_nequa_2.groupby(produit_id)['Poids'].idxmax()
|
| 135 |
-
|
| 136 |
-
# Updating columns for all rows instead of iterating over unique barcodes
|
| 137 |
-
df_nequa_2.loc[:, class_id] = df_nequa_2.loc[max_poids_index, class_id].values
|
| 138 |
-
df_nequa_2.loc[:, f'{class_id[:-4]}_DESC_FR'] = df_nequa_2.loc[max_poids_index, f'{class_id[:-4]}_DESC_FR'].values
|
| 139 |
|
| 140 |
-
df_duplicate = df_nequa_2.copy()
|
| 141 |
-
df_duplicate.Countries = df_duplicate.Countries.apply(lambda x : ','.join(x))
|
| 142 |
-
new_rows = []
|
| 143 |
-
for _, row in df_duplicate.iterrows():
|
| 144 |
-
countries = row['Countries'].split(',')
|
| 145 |
-
for country in countries:
|
| 146 |
-
new_row = row.copy()
|
| 147 |
-
new_row['Countries'] = country
|
| 148 |
-
new_rows.append(new_row)
|
| 149 |
-
|
| 150 |
-
new_df = pd.DataFrame(new_rows).drop_duplicates()
|
| 151 |
-
new_df = new_df.rename(columns={'Countries': 'Country'}, errors='ignore')
|
| 152 |
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
| 159 |
|
| 160 |
-
barcodes = df_f_f[produit_id].unique()
|
| 161 |
-
max = 0
|
| 162 |
-
Barcodes = []
|
| 163 |
for barcode in barcodes:
|
| 164 |
-
items =
|
| 165 |
if len(items) == 2:
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from App.utils.priorite_pays import dico
|
| 5 |
+
# from App.utils.divers_function import data_cleaning_func
|
| 6 |
import nltk
|
| 7 |
+
from typing import Tuple, List
|
| 8 |
+
|
| 9 |
+
nltk.download("stopwords")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def filter_data_with_valid_keys(
|
| 13 |
+
data: pd.DataFrame,
|
| 14 |
+
product_id_col: str,
|
| 15 |
+
class_id_col: str,
|
| 16 |
+
min_product_id_length: int,
|
| 17 |
+
valid_class_id_prefixes: List[str],
|
| 18 |
+
) -> pd.DataFrame:
|
| 19 |
+
"""
|
| 20 |
+
Filter the dataframe based on product ID length and class ID prefixes.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
data (pd.DataFrame): Input dataframe
|
| 24 |
+
product_id_col (str): Name of the product ID column
|
| 25 |
+
class_id_col (str): Name of the class ID column
|
| 26 |
+
min_product_id_length (int): Minimum length for product IDs
|
| 27 |
+
valid_class_id_prefixes (List[str]): List of valid prefixes for class IDs
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
pd.DataFrame: Filtered dataframe
|
| 31 |
+
"""
|
| 32 |
+
filtered_data = data[data[product_id_col].str.len() > min_product_id_length]
|
| 33 |
+
try:
|
| 34 |
+
filtered_data = filtered_data[
|
| 35 |
+
~filtered_data[class_id_col].str[0].isin(valid_class_id_prefixes)
|
| 36 |
+
]
|
| 37 |
+
except Exception:
|
| 38 |
pass
|
| 39 |
+
return filtered_data
|
|
|
|
| 40 |
|
|
|
|
| 41 |
|
| 42 |
+
@st.cache_data
|
| 43 |
+
def calculate_product_class_matrix(
|
| 44 |
+
data: pd.DataFrame, product_id_col: str, class_id_col: str
|
| 45 |
+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
| 46 |
+
"""
|
| 47 |
+
Calculate the product-class matrix and total counts per product.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
data (pd.DataFrame): Input dataframe
|
| 51 |
+
product_id_col (str): Name of the product ID column
|
| 52 |
+
class_id_col (str): Name of the class ID column
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Tuple[pd.DataFrame, pd.DataFrame]: Total counts per product and product-class matrix
|
| 56 |
+
"""
|
| 57 |
+
matrix = pd.crosstab(data[product_id_col], data[class_id_col])
|
| 58 |
+
total_by_product = matrix.sum(axis=1)
|
| 59 |
+
|
| 60 |
+
products_with_multiple_classes = total_by_product[total_by_product > 1].index
|
| 61 |
+
filtered_data = data[data[product_id_col].isin(products_with_multiple_classes)]
|
| 62 |
+
matrix = pd.crosstab(filtered_data[product_id_col], filtered_data[class_id_col])
|
| 63 |
+
|
| 64 |
+
total_by_product = matrix.sum(axis=1)
|
| 65 |
+
total_by_product_df = pd.DataFrame(
|
| 66 |
+
{
|
| 67 |
+
product_id_col: total_by_product.index,
|
| 68 |
+
"total_by_product": total_by_product.values,
|
| 69 |
+
}
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return total_by_product_df, matrix
|
| 73 |
|
| 74 |
|
| 75 |
@st.cache_data
|
| 76 |
+
def create_sparse_matrix(
|
| 77 |
+
matrix: pd.DataFrame, product_id_col: str, class_id_col: str
|
| 78 |
+
) -> pd.DataFrame:
|
| 79 |
+
"""
|
| 80 |
+
Create a sparse matrix representation from the product-class matrix.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
matrix (pd.DataFrame): Product-class matrix
|
| 84 |
+
product_id_col (str): Name of the product ID column
|
| 85 |
+
class_id_col (str): Name of the class ID column
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
pd.DataFrame: Sparse matrix representation
|
| 89 |
+
"""
|
| 90 |
+
stacked = matrix.stack()
|
| 91 |
non_zero = stacked[stacked != 0]
|
| 92 |
+
sparse_matrix = pd.DataFrame(
|
| 93 |
+
{
|
| 94 |
+
product_id_col: non_zero.index.get_level_values(0).astype(str),
|
| 95 |
+
class_id_col: non_zero.index.get_level_values(1).astype(str),
|
| 96 |
+
"count": non_zero.values,
|
| 97 |
+
}
|
| 98 |
+
)
|
| 99 |
return sparse_matrix
|
| 100 |
|
| 101 |
|
| 102 |
@st.cache_data
|
| 103 |
+
def process_new_data(
|
| 104 |
+
data: pd.DataFrame, product_id_col: str, class_id_col: str
|
| 105 |
+
) -> Tuple[pd.Series, pd.DataFrame]:
|
| 106 |
+
"""
|
| 107 |
+
Process the data to create a new dataset with country groups and merged information.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
data (pd.DataFrame): Input dataframe
|
| 111 |
+
product_id_col (str): Name of the product ID column
|
| 112 |
+
class_id_col (str): Name of the class ID column
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
Tuple[pd.Series, pd.DataFrame]: Country groups and merged dataframe
|
| 116 |
+
"""
|
| 117 |
+
total_by_product_df, matrix = calculate_product_class_matrix(
|
| 118 |
+
data, product_id_col, class_id_col
|
| 119 |
+
)
|
| 120 |
+
sparse_matrix = create_sparse_matrix(matrix, product_id_col, class_id_col)
|
| 121 |
+
merged_data = pd.merge(sparse_matrix, total_by_product_df, on=[product_id_col])
|
| 122 |
+
merged_data["Proportion"] = merged_data["count"] / merged_data["total_by_product"]
|
| 123 |
+
final_merged = merged_data.merge(
|
| 124 |
+
data,
|
| 125 |
+
left_on=[class_id_col, product_id_col],
|
| 126 |
+
right_on=[class_id_col, product_id_col],
|
| 127 |
+
)
|
| 128 |
+
try:
|
| 129 |
+
country_groups = final_merged.groupby([class_id_col, product_id_col])[
|
| 130 |
+
"Country"
|
| 131 |
+
].agg(lambda x: x.tolist())
|
| 132 |
+
except KeyError:
|
| 133 |
+
try:
|
| 134 |
+
country_groups = final_merged.groupby([class_id_col, product_id_col])[
|
| 135 |
+
"COUNTRY_KEY"
|
| 136 |
+
].agg(lambda x: x.tolist())
|
| 137 |
+
except KeyError:
|
| 138 |
+
country_groups = final_merged.groupby([class_id_col, product_id_col])[
|
| 139 |
+
"COUNTRY"
|
| 140 |
+
].agg(lambda x: x.tolist())
|
| 141 |
+
return country_groups, final_merged
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def add_country(produit_id: str, class_id: str, Country) -> List[str]:
|
| 145 |
+
"""
|
| 146 |
+
Retrieve the list of countries for a given product ID and class ID.
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
product_id (str): The product ID
|
| 150 |
+
class_id (str): The class ID
|
| 151 |
+
country_groups (pd.Series): Series containing country groups
|
| 152 |
+
|
| 153 |
+
Returns:
|
| 154 |
+
List[str]: List of countries for the given product and class
|
| 155 |
+
"""
|
| 156 |
return Country[produit_id, class_id]
|
| 157 |
|
| 158 |
|
| 159 |
+
def finalize_merged_data(
|
| 160 |
+
merged: pd.DataFrame,
|
| 161 |
+
country_groups: pd.Series,
|
| 162 |
+
product_id_col: str,
|
| 163 |
+
class_id_col: str,
|
| 164 |
+
) -> pd.DataFrame:
|
| 165 |
+
"""
|
| 166 |
+
Finalize the merged data by adding country information and removing duplicates.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
merged (pd.DataFrame): Merged dataframe
|
| 170 |
+
country_groups (pd.Series): Series containing country groups
|
| 171 |
+
product_id_col (str): Name of the product ID column
|
| 172 |
+
class_id_col (str): Name of the class ID column
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
pd.DataFrame: Finalized merged dataframe
|
| 176 |
+
"""
|
| 177 |
try:
|
| 178 |
+
merged["Countries"] = merged.apply(
|
| 179 |
+
lambda row: add_country(
|
| 180 |
+
row[1], row[0], country_groups
|
| 181 |
+
),
|
| 182 |
+
axis=1,
|
| 183 |
+
)
|
| 184 |
+
merged["Countries"] = merged["Countries"].apply(tuple)
|
| 185 |
+
final_merged = merged.drop_duplicates(
|
| 186 |
+
subset=[product_id_col, class_id_col, "Countries"]
|
| 187 |
+
)
|
| 188 |
except Exception as e:
|
| 189 |
+
st.warning(f"An error occurred: {e}")
|
| 190 |
+
final_merged = None
|
| 191 |
+
return final_merged
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def filter_by_country_and_proportion(
|
| 195 |
+
merged_data: pd.DataFrame,
|
| 196 |
+
min_countries: int,
|
| 197 |
+
min_proportion: float,
|
| 198 |
+
product_id_col: str,
|
| 199 |
+
) -> pd.DataFrame:
|
| 200 |
+
"""
|
| 201 |
+
Filter the merged data based on minimum number of countries and proportion.
|
| 202 |
+
|
| 203 |
+
Args:
|
| 204 |
+
merged_data (pd.DataFrame): Merged dataframe
|
| 205 |
+
min_countries (int): Minimum number of countries required
|
| 206 |
+
min_proportion (float): Minimum proportion required
|
| 207 |
+
product_id_col (str): Name of the product ID column
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
pd.DataFrame: Filtered dataframe
|
| 211 |
+
"""
|
| 212 |
+
filtered_data = merged_data[
|
| 213 |
+
(merged_data.Proportion >= min_proportion)
|
| 214 |
+
& (merged_data.total_by_product >= min_countries)
|
| 215 |
+
]
|
| 216 |
+
product_keys = filtered_data[product_id_col].unique()
|
| 217 |
+
result_df = merged_data[merged_data[product_id_col].isin(product_keys)]
|
| 218 |
+
return result_df
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def process_country_priority(
|
| 222 |
+
merged_data: pd.DataFrame, product_id_col: str
|
| 223 |
+
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
| 224 |
+
"""
|
| 225 |
+
Process the merged data based on country priority.
|
| 226 |
+
|
| 227 |
+
Args:
|
| 228 |
+
merged_data (pd.DataFrame): Merged dataframe
|
| 229 |
+
product_id_col (str): Name of the product ID column
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Processed dataframes (all, equal weight, non-equal weight)
|
| 233 |
+
"""
|
| 234 |
+
data = merged_data[
|
| 235 |
+
(merged_data.Proportion == 0.5) & (merged_data.total_by_product >= 2)
|
| 236 |
+
]
|
| 237 |
+
product_keys = data[product_id_col].unique()
|
| 238 |
+
df = merged_data[merged_data[product_id_col].isin(product_keys)]
|
| 239 |
+
|
| 240 |
+
df["Weight"] = df["Countries"].apply(lambda x: sum(dico[y] for y in x))
|
| 241 |
+
|
| 242 |
+
duplicated_subclass = df.duplicated(subset=[product_id_col, "Weight"], keep=False)
|
| 243 |
+
df_equal = df[duplicated_subclass & (df.Proportion == 0.5)]
|
| 244 |
+
df_not_equal = df[~df.isin(df_equal)].dropna()
|
| 245 |
+
|
| 246 |
+
return df, df_equal, df_not_equal
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def merge_final_data(
|
| 250 |
+
original_data: pd.DataFrame,
|
| 251 |
+
new_data: pd.DataFrame,
|
| 252 |
+
product_id_col: str,
|
| 253 |
+
class_id_col: str,
|
| 254 |
+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
| 255 |
+
"""
|
| 256 |
+
Merge the original data with the new processed data.
|
| 257 |
+
|
| 258 |
+
Args:
|
| 259 |
+
original_data (pd.DataFrame): Original dataframe
|
| 260 |
+
new_data (pd.DataFrame): New processed dataframe
|
| 261 |
+
product_id_col (str): Name of the product ID column
|
| 262 |
+
class_id_col (str): Name of the class ID column
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
Tuple[pd.DataFrame, pd.DataFrame]: Final merged data and changes summary
|
| 266 |
+
"""
|
| 267 |
+
merged_df = pd.merge(
|
| 268 |
+
original_data,
|
| 269 |
+
new_data,
|
| 270 |
+
on=["COUNTRY_KEY", product_id_col],
|
| 271 |
+
how="left",
|
| 272 |
+
suffixes=("", "_y"),
|
| 273 |
+
)
|
| 274 |
+
merged_df[class_id_col] = merged_df[f"{class_id_col}_y"].fillna(
|
| 275 |
+
merged_df[class_id_col]
|
| 276 |
+
)
|
| 277 |
+
merged_df[f"{class_id_col[:-4]}_DESC_FR"] = merged_df[
|
| 278 |
+
f"{class_id_col[:-4]}_DESC_FR_y"
|
| 279 |
+
].fillna(merged_df[f"{class_id_col[:-4]}_DESC_FR"])
|
| 280 |
+
|
| 281 |
+
df_final = merged_df[
|
| 282 |
+
[product_id_col, "COUNTRY_KEY", class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
|
| 283 |
+
]
|
| 284 |
+
|
| 285 |
+
merged = pd.merge(original_data, df_final, how="outer", indicator=True)
|
| 286 |
+
data_final = merged[merged["_merge"] != "both"]
|
| 287 |
+
data_final = data_final.rename(columns={"_merge": "Changes"})
|
| 288 |
+
data_final.sort_values(by=[product_id_col], ascending=True, inplace=True)
|
| 289 |
+
data_final["Changes"] = data_final["Changes"].apply(
|
| 290 |
+
lambda x: "Before" if x == "left_only" else "After"
|
| 291 |
+
)
|
| 292 |
+
data_final = data_final[
|
| 293 |
+
[
|
| 294 |
+
product_id_col,
|
| 295 |
+
"COUNTRY_KEY",
|
| 296 |
+
class_id_col,
|
| 297 |
+
f"{class_id_col[:-4]}_DESC_FR",
|
| 298 |
+
"Changes",
|
| 299 |
+
]
|
| 300 |
+
]
|
| 301 |
+
data_final.drop_duplicates(inplace=True)
|
| 302 |
+
|
| 303 |
+
return data_final, df_final
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def process_non_equal_data(
|
| 307 |
+
df_not_equal: pd.DataFrame, product_id_col: str, class_id_col: str
|
| 308 |
+
) -> pd.DataFrame:
|
| 309 |
+
"""
|
| 310 |
+
Process data with non-equal weights, selecting the classification with the highest weight.
|
| 311 |
+
|
| 312 |
+
Args:
|
| 313 |
+
df_not_equal (pd.DataFrame): Dataframe with non-equal weights
|
| 314 |
+
product_id_col (str): Name of the product ID column
|
| 315 |
+
class_id_col (str): Name of the class ID column
|
| 316 |
+
|
| 317 |
+
Returns:
|
| 318 |
+
pd.DataFrame: Processed dataframe with selected classifications
|
| 319 |
+
"""
|
| 320 |
+
df_multi_country = df_not_equal[df_not_equal.Countries.apply(len) > 1]
|
| 321 |
+
max_weight_index = df_multi_country.groupby(product_id_col)["Weight"].idxmax()
|
| 322 |
+
|
| 323 |
+
df_multi_country.loc[:, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]] = (
|
| 324 |
+
df_multi_country.loc[
|
| 325 |
+
max_weight_index, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
|
| 326 |
+
].values
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
df_duplicate = df_multi_country.copy()
|
| 330 |
+
df_duplicate.Countries = df_duplicate.Countries.str.join(",")
|
| 331 |
+
|
| 332 |
+
new_df = (
|
| 333 |
+
df_duplicate.explode("Countries")
|
| 334 |
+
.rename(columns={"Countries": "Country"})
|
| 335 |
+
.drop_duplicates()
|
| 336 |
+
)
|
| 337 |
|
| 338 |
+
return new_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
|
| 341 |
+
def process_france_data(
|
| 342 |
+
df: pd.DataFrame, product_id_col: str, class_id_col: str
|
| 343 |
+
) -> pd.DataFrame:
|
| 344 |
+
"""
|
| 345 |
+
Process data specific to France, handling special cases for item keys.
|
| 346 |
|
| 347 |
+
Args:
|
| 348 |
+
df (pd.DataFrame): Input dataframe
|
| 349 |
+
product_id_col (str): Name of the product ID column
|
| 350 |
+
class_id_col (str): Name of the class ID column
|
| 351 |
|
| 352 |
+
Returns:
|
| 353 |
+
pd.DataFrame: Processed dataframe for France
|
| 354 |
+
"""
|
| 355 |
+
df_france = df[df.Country == "FRA"]
|
| 356 |
+
barcodes = df_france[product_id_col].unique()
|
| 357 |
|
|
|
|
|
|
|
|
|
|
| 358 |
for barcode in barcodes:
|
| 359 |
+
items = df_france.item_key[df_france[product_id_col] == barcode].tolist()
|
| 360 |
if len(items) == 2:
|
| 361 |
+
if "R" in items[0]:
|
| 362 |
+
df_france.loc[
|
| 363 |
+
(df_france[product_id_col] == barcode)
|
| 364 |
+
& (df_france.item_key == items[0]),
|
| 365 |
+
[class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
|
| 366 |
+
] = df_france.loc[
|
| 367 |
+
(df_france[product_id_col] == barcode)
|
| 368 |
+
& (df_france.item_key == items[1]),
|
| 369 |
+
[class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
|
| 370 |
+
].values
|
| 371 |
+
elif "R" in items[1]:
|
| 372 |
+
df_france.loc[
|
| 373 |
+
(df_france[product_id_col] == barcode)
|
| 374 |
+
& (df_france.item_key == items[1]),
|
| 375 |
+
[class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
|
| 376 |
+
] = df_france.loc[
|
| 377 |
+
(df_france[product_id_col] == barcode)
|
| 378 |
+
& (df_france.item_key == items[0]),
|
| 379 |
+
[class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
|
| 380 |
+
].values
|
| 381 |
+
|
| 382 |
+
return df_france
|
App/utils/divers_function.py
CHANGED
|
@@ -3,7 +3,7 @@ import pandas as pd
|
|
| 3 |
import re
|
| 4 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
-
|
| 7 |
from App.utils.standadisation import *
|
| 8 |
from nltk.corpus import stopwords
|
| 9 |
from nltk.stem import PorterStemmer
|
|
@@ -13,82 +13,123 @@ from nltk.corpus import stopwords
|
|
| 13 |
|
| 14 |
@st.cache_data
|
| 15 |
def convert_df(df):
|
| 16 |
-
return df.to_csv().encode(
|
|
|
|
| 17 |
|
| 18 |
@st.cache_data
|
| 19 |
def supprime_country(df):
|
| 20 |
-
try
|
| 21 |
-
|
| 22 |
-
except
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
return df
|
| 31 |
|
| 32 |
|
| 33 |
-
def
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
return finale_df
|
| 47 |
|
|
|
|
|
|
|
| 48 |
|
|
|
|
| 49 |
|
| 50 |
-
|
|
|
|
| 51 |
|
| 52 |
strings = strings.lower().strip()
|
| 53 |
-
strings = strings.replace('
|
| 54 |
-
strings = strings.replace(
|
| 55 |
-
strings = re.sub(r
|
| 56 |
-
text_normalized = re.sub(
|
| 57 |
|
| 58 |
return text_normalized
|
| 59 |
|
| 60 |
|
| 61 |
-
def
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
|
| 68 |
|
| 69 |
-
def
|
| 70 |
liste_stopword_unicode = [str(item) for item in liste_stopword]
|
| 71 |
-
en_stops = set(stopwords.words(
|
| 72 |
-
fr_stops = set(stopwords.words(
|
| 73 |
|
| 74 |
-
list_DESCRIPTION = strings.split(
|
| 75 |
cleaned_list = []
|
| 76 |
|
| 77 |
for ingredient in list_DESCRIPTION:
|
| 78 |
-
temp = ingredient.split(
|
| 79 |
-
cleaned_ingredient =
|
|
|
|
|
|
|
| 80 |
cleaned_list.append(cleaned_ingredient)
|
| 81 |
|
| 82 |
-
strings =
|
| 83 |
-
list_DESCRIPTION = strings.split(
|
| 84 |
cleaned_list = []
|
| 85 |
|
| 86 |
for ingredient in list_DESCRIPTION:
|
| 87 |
-
temp = ingredient.split(
|
| 88 |
-
cleaned_ingredient =
|
|
|
|
|
|
|
| 89 |
cleaned_list.append(cleaned_ingredient)
|
| 90 |
|
| 91 |
-
strings =
|
| 92 |
return strings
|
| 93 |
|
| 94 |
|
|
@@ -97,27 +138,31 @@ fr_stemmer = FrenchStemmer()
|
|
| 97 |
|
| 98 |
|
| 99 |
def stem_sentence(sentence, stemmer):
|
| 100 |
-
words = sentence.split(
|
| 101 |
stemmed_words = [stemmer.stem(word) for word in words]
|
| 102 |
-
stemmed_sentence =
|
| 103 |
return stemmed_sentence
|
| 104 |
|
| 105 |
|
| 106 |
def english_stemmer(strings):
|
| 107 |
-
list_ingredients = strings.split(
|
| 108 |
-
stemmed_list = [
|
| 109 |
-
|
|
|
|
|
|
|
| 110 |
return strings
|
| 111 |
|
| 112 |
|
| 113 |
def french_stemmer(strings):
|
| 114 |
-
list_ingredients = strings.split(
|
| 115 |
-
stemmed_list = [
|
| 116 |
-
|
|
|
|
|
|
|
| 117 |
return strings
|
| 118 |
|
| 119 |
|
| 120 |
-
def
|
| 121 |
|
| 122 |
vectorizer = CountVectorizer()
|
| 123 |
vectors = vectorizer.fit_transform([expr1, expr2])
|
|
@@ -125,35 +170,88 @@ def cosine_similarity_between_expressions(expr1, expr2):
|
|
| 125 |
|
| 126 |
return similarity[0][0]
|
| 127 |
|
| 128 |
-
def ajout_simularite(data) :
|
| 129 |
-
data["ITEM_DESC_avant_clean"] = data["ITEM_DESC_x"].apply(data_cleaning)
|
| 130 |
-
data["ITEM_DESC_apres_clean"] = data["ITEM_DESC_y"].apply(data_cleaning)
|
| 131 |
-
|
| 132 |
-
stop = stopwords.words('french')
|
| 133 |
-
data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
|
| 134 |
-
data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
|
| 135 |
-
|
| 136 |
-
stop = stopwords.words('english')
|
| 137 |
-
data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
|
| 138 |
-
data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
-
|
| 144 |
-
data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(standardization)
|
| 145 |
-
|
| 146 |
-
data["Cosinus similarité"] = data.apply(lambda row: cosine_similarity_between_expressions(row['ITEM_DESC_apres_clean'], row['ITEM_DESC_avant_clean']), axis=1)
|
| 147 |
-
|
| 148 |
-
return data
|
| 149 |
|
| 150 |
|
| 151 |
-
def display_data_with_download_button(
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
| 153 |
st.write("No result for the above criterion ")
|
| 154 |
-
else
|
| 155 |
st.subheader(title)
|
| 156 |
df.loc[:, "Evaluation"] = True
|
| 157 |
edited_df = st.data_editor(df)
|
| 158 |
csv_data = convert_df(edited_df)
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import re
|
| 4 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
from typing import Callable
|
| 7 |
from App.utils.standadisation import *
|
| 8 |
from nltk.corpus import stopwords
|
| 9 |
from nltk.stem import PorterStemmer
|
|
|
|
| 13 |
|
| 14 |
@st.cache_data
|
| 15 |
def convert_df(df):
|
| 16 |
+
return df.to_csv().encode("utf-8")
|
| 17 |
+
|
| 18 |
|
| 19 |
@st.cache_data
|
| 20 |
def supprime_country(df):
|
| 21 |
+
try:
|
| 22 |
+
df.drop(["Country"], axis=1, inplace=True)
|
| 23 |
+
except:
|
| 24 |
+
try:
|
| 25 |
+
df.drop(["COUNTRY_KEY"], axis=1, inplace=True)
|
| 26 |
+
except:
|
| 27 |
+
try:
|
| 28 |
+
df.drop(["COUNTRY"], axis=1, inplace=True)
|
| 29 |
+
except:
|
| 30 |
+
pass
|
| 31 |
return df
|
| 32 |
|
| 33 |
|
| 34 |
+
def merge_and_update_classification(
|
| 35 |
+
main_df, update_df, product_id_col, classification_col
|
| 36 |
+
):
|
| 37 |
+
"""
|
| 38 |
+
Merge two DataFrames and update the classification based on the update_df.
|
| 39 |
+
Only rows where the classification has changed are retained.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
main_df (pd.DataFrame): The main DataFrame containing original data.
|
| 43 |
+
update_df (pd.DataFrame): DataFrame containing updated classifications.
|
| 44 |
+
product_id_col (str): Name of the column used as the product identifier.
|
| 45 |
+
classification_col (str): Name of the classification column to be updated.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
pd.DataFrame: A DataFrame containing only the rows where classification was updated.
|
| 49 |
+
"""
|
| 50 |
+
# Get unique product IDs from the update DataFrame
|
| 51 |
+
update_product_ids = update_df[product_id_col].unique()
|
| 52 |
+
|
| 53 |
+
# Filter main DataFrame to include only products in the update DataFrame
|
| 54 |
+
filtered_main_df = main_df[main_df[product_id_col].isin(update_product_ids)]
|
| 55 |
+
|
| 56 |
+
# Preserve the original classification
|
| 57 |
+
original_classification_col = f"original_{classification_col}"
|
| 58 |
+
filtered_main_df[original_classification_col] = filtered_main_df[classification_col]
|
| 59 |
|
| 60 |
+
# Merge DataFrames
|
| 61 |
+
merged_df = pd.merge(
|
| 62 |
+
filtered_main_df,
|
| 63 |
+
update_df,
|
| 64 |
+
on=[product_id_col],
|
| 65 |
+
how="inner",
|
| 66 |
+
suffixes=("_main", "_update"),
|
| 67 |
+
indicator=True,
|
| 68 |
+
)
|
| 69 |
|
| 70 |
+
# Update classification, keeping original if update is NaN
|
| 71 |
+
merged_df[classification_col] = merged_df[f"{classification_col}_update"].fillna(
|
| 72 |
+
merged_df[original_classification_col]
|
| 73 |
+
)
|
| 74 |
|
| 75 |
+
# Keep only rows where classification has changed
|
| 76 |
+
updated_df = merged_df[
|
| 77 |
+
merged_df[f"{classification_col}_main"]
|
| 78 |
+
!= merged_df[f"{classification_col}_update"]
|
| 79 |
+
]
|
|
|
|
| 80 |
|
| 81 |
+
# Remove merge indicator column
|
| 82 |
+
final_df = updated_df.drop(columns=["_merge"])
|
| 83 |
|
| 84 |
+
return final_df
|
| 85 |
|
| 86 |
+
|
| 87 |
+
def data_cleaning_func(strings):
|
| 88 |
|
| 89 |
strings = strings.lower().strip()
|
| 90 |
+
strings = strings.replace("'", " ")
|
| 91 |
+
strings = strings.replace("/", " ")
|
| 92 |
+
strings = re.sub(r"[^\w\s]", " ", strings)
|
| 93 |
+
text_normalized = re.sub("[^A-Za-z ,éêèîôœàâ]+", " ", strings)
|
| 94 |
|
| 95 |
return text_normalized
|
| 96 |
|
| 97 |
|
| 98 |
+
def standardization_func(strings):
|
| 99 |
+
liste = strings.split(" ")
|
| 100 |
+
for i in range(len(liste)):
|
| 101 |
+
if liste[i] in dictionnaire.keys():
|
| 102 |
+
liste[i] = dictionnaire[liste[i]]
|
| 103 |
+
return " ".join(liste)
|
| 104 |
|
| 105 |
|
| 106 |
+
def remove_stop_words_func(strings):
|
| 107 |
liste_stopword_unicode = [str(item) for item in liste_stopword]
|
| 108 |
+
en_stops = set(stopwords.words("english") + liste_stopword_unicode)
|
| 109 |
+
fr_stops = set(stopwords.words("french") + liste_stopword_unicode)
|
| 110 |
|
| 111 |
+
list_DESCRIPTION = strings.split(" ")
|
| 112 |
cleaned_list = []
|
| 113 |
|
| 114 |
for ingredient in list_DESCRIPTION:
|
| 115 |
+
temp = ingredient.split(" ")
|
| 116 |
+
cleaned_ingredient = " ".join(
|
| 117 |
+
[word for word in temp if word.lower() not in en_stops]
|
| 118 |
+
)
|
| 119 |
cleaned_list.append(cleaned_ingredient)
|
| 120 |
|
| 121 |
+
strings = " ".join([ingredient for ingredient in cleaned_list])
|
| 122 |
+
list_DESCRIPTION = strings.split(" ")
|
| 123 |
cleaned_list = []
|
| 124 |
|
| 125 |
for ingredient in list_DESCRIPTION:
|
| 126 |
+
temp = ingredient.split(" ")
|
| 127 |
+
cleaned_ingredient = " ".join(
|
| 128 |
+
[word for word in temp if word.lower() not in fr_stops]
|
| 129 |
+
)
|
| 130 |
cleaned_list.append(cleaned_ingredient)
|
| 131 |
|
| 132 |
+
strings = " ".join([ingredient for ingredient in cleaned_list])
|
| 133 |
return strings
|
| 134 |
|
| 135 |
|
|
|
|
| 138 |
|
| 139 |
|
| 140 |
def stem_sentence(sentence, stemmer):
|
| 141 |
+
words = sentence.split(" ")
|
| 142 |
stemmed_words = [stemmer.stem(word) for word in words]
|
| 143 |
+
stemmed_sentence = " ".join(stemmed_words)
|
| 144 |
return stemmed_sentence
|
| 145 |
|
| 146 |
|
| 147 |
def english_stemmer(strings):
|
| 148 |
+
list_ingredients = strings.split(" ")
|
| 149 |
+
stemmed_list = [
|
| 150 |
+
stem_sentence(ingredient, en_stemmer) for ingredient in list_ingredients
|
| 151 |
+
]
|
| 152 |
+
strings = " ".join(stemmed_list)
|
| 153 |
return strings
|
| 154 |
|
| 155 |
|
| 156 |
def french_stemmer(strings):
|
| 157 |
+
list_ingredients = strings.split(",")
|
| 158 |
+
stemmed_list = [
|
| 159 |
+
stem_sentence(ingredient, fr_stemmer) for ingredient in list_ingredients
|
| 160 |
+
]
|
| 161 |
+
strings = " ".join(stemmed_list)
|
| 162 |
return strings
|
| 163 |
|
| 164 |
|
| 165 |
+
def cosine_similarity_func(expr1, expr2):
|
| 166 |
|
| 167 |
vectorizer = CountVectorizer()
|
| 168 |
vectors = vectorizer.fit_transform([expr1, expr2])
|
|
|
|
| 170 |
|
| 171 |
return similarity[0][0]
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
def add_text_similarity(
|
| 175 |
+
df: pd.DataFrame,
|
| 176 |
+
data_cleaning_func: Callable = data_cleaning_func,
|
| 177 |
+
remove_stop_words_func: Callable = remove_stop_words_func,
|
| 178 |
+
standardization_func: Callable = standardization_func,
|
| 179 |
+
cosine_similarity_func: Callable = cosine_similarity_func,
|
| 180 |
+
) -> pd.DataFrame:
|
| 181 |
+
"""
|
| 182 |
+
Add text similarity measures to the DataFrame based on item descriptions.
|
| 183 |
+
|
| 184 |
+
Args:
|
| 185 |
+
df (pd.DataFrame): Input DataFrame containing item descriptions.
|
| 186 |
+
data_cleaning_func (Callable): Function to clean the text data.
|
| 187 |
+
remove_stop_words_func (Callable): Function to remove stop words.
|
| 188 |
+
standardization_func (Callable): Function to standardize text.
|
| 189 |
+
cosine_similarity_func (Callable): Function to calculate cosine similarity.
|
| 190 |
+
|
| 191 |
+
Returns:
|
| 192 |
+
pd.DataFrame: DataFrame with added text similarity measures.
|
| 193 |
+
"""
|
| 194 |
+
# Clean item descriptions
|
| 195 |
+
df["ITEM_DESC_before_clean"] = df["ITEM_DESC_main"].apply(
|
| 196 |
+
data_cleaning_func
|
| 197 |
+
)
|
| 198 |
+
df["ITEM_DESC_after_clean"] = df["ITEM_DESC_update"].apply(
|
| 199 |
+
data_cleaning_func
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# Remove stop words (French and English)
|
| 203 |
+
for language in ["french", "english"]:
|
| 204 |
+
stop_words = set(stopwords.words(language))
|
| 205 |
+
for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
|
| 206 |
+
df[col] = df[col].apply(
|
| 207 |
+
lambda x: " ".join(
|
| 208 |
+
word for word in x.split() if word.lower() not in stop_words
|
| 209 |
+
)
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Apply custom stop words removal
|
| 213 |
+
for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
|
| 214 |
+
df[col] = df[col].apply(remove_stop_words_func)
|
| 215 |
+
|
| 216 |
+
# Standardize text
|
| 217 |
+
for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
|
| 218 |
+
df[col] = df[col].apply(standardization_func)
|
| 219 |
+
|
| 220 |
+
# Calculate cosine similarity
|
| 221 |
+
df["Cosine_Similarity"] = df.apply(
|
| 222 |
+
lambda row: cosine_similarity_func(
|
| 223 |
+
row["ITEM_DESC_after_clean"], row["ITEM_DESC_before_clean"]
|
| 224 |
+
),
|
| 225 |
+
axis=1,
|
| 226 |
+
)
|
| 227 |
|
| 228 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
|
| 231 |
+
def display_data_with_download_button(
|
| 232 |
+
df,
|
| 233 |
+
title="Data without decision-making"
|
| 234 |
+
) -> None:
|
| 235 |
+
if df.empty:
|
| 236 |
st.write("No result for the above criterion ")
|
| 237 |
+
else:
|
| 238 |
st.subheader(title)
|
| 239 |
df.loc[:, "Evaluation"] = True
|
| 240 |
edited_df = st.data_editor(df)
|
| 241 |
csv_data = convert_df(edited_df)
|
| 242 |
+
try:
|
| 243 |
+
st.download_button(
|
| 244 |
+
label="Download data as CSV",
|
| 245 |
+
data=csv_data,
|
| 246 |
+
file_name=f"{title}.csv",
|
| 247 |
+
mime="text/csv",
|
| 248 |
+
key=title,
|
| 249 |
+
)
|
| 250 |
+
except:
|
| 251 |
+
st.download_button(
|
| 252 |
+
label="Download data as CSV",
|
| 253 |
+
data=csv_data,
|
| 254 |
+
file_name=f"{title}.csv",
|
| 255 |
+
mime="text/csv",
|
| 256 |
+
key=title + "1",
|
| 257 |
+
)
|
App/utils/filter_dataframe.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import pandas as pd
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
from pandas.api.types import (
|
| 4 |
is_categorical_dtype,
|
|
@@ -8,8 +9,7 @@ from pandas.api.types import (
|
|
| 8 |
)
|
| 9 |
|
| 10 |
|
| 11 |
-
|
| 12 |
-
def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
| 13 |
"""
|
| 14 |
Adds a UI on top of a dataframe to let viewers filter columns
|
| 15 |
|
|
@@ -19,7 +19,10 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 19 |
Returns:
|
| 20 |
pd.DataFrame: Filtered dataframe
|
| 21 |
"""
|
| 22 |
-
modify = st.checkbox(
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
if not modify:
|
| 25 |
return df
|
|
@@ -30,7 +33,7 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 30 |
for col in df.columns:
|
| 31 |
if is_object_dtype(df[col]):
|
| 32 |
try:
|
| 33 |
-
df[col] = pd.to_datetime(df[col])
|
| 34 |
except Exception:
|
| 35 |
pass
|
| 36 |
|
|
@@ -40,7 +43,11 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 40 |
modification_container = st.container()
|
| 41 |
|
| 42 |
with modification_container:
|
| 43 |
-
to_filter_columns = st.multiselect(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
for column in to_filter_columns:
|
| 45 |
left, right = st.columns((1, 20))
|
| 46 |
left.write("↳")
|
|
@@ -73,7 +80,12 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 73 |
),
|
| 74 |
)
|
| 75 |
if len(user_date_input) == 2:
|
| 76 |
-
user_date_input = tuple(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
start_date, end_date = user_date_input
|
| 78 |
df = df.loc[df[column].between(start_date, end_date)]
|
| 79 |
else:
|
|
@@ -81,6 +93,6 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 81 |
f"Substring or regex in {column}",
|
| 82 |
)
|
| 83 |
if user_text_input:
|
| 84 |
-
df = df[df[column].str.contains(user_text_input)]
|
| 85 |
|
| 86 |
return df
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
import streamlit as st
|
| 4 |
from pandas.api.types import (
|
| 5 |
is_categorical_dtype,
|
|
|
|
| 9 |
)
|
| 10 |
|
| 11 |
|
| 12 |
+
def filter_dataframe(df: pd.DataFrame, key: str = "filter_dataframe_on") -> pd.DataFrame:
|
|
|
|
| 13 |
"""
|
| 14 |
Adds a UI on top of a dataframe to let viewers filter columns
|
| 15 |
|
|
|
|
| 19 |
Returns:
|
| 20 |
pd.DataFrame: Filtered dataframe
|
| 21 |
"""
|
| 22 |
+
modify = st.checkbox(
|
| 23 |
+
"Add filters",
|
| 24 |
+
key=key + "checkbox"
|
| 25 |
+
)
|
| 26 |
|
| 27 |
if not modify:
|
| 28 |
return df
|
|
|
|
| 33 |
for col in df.columns:
|
| 34 |
if is_object_dtype(df[col]):
|
| 35 |
try:
|
| 36 |
+
df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
|
| 37 |
except Exception:
|
| 38 |
pass
|
| 39 |
|
|
|
|
| 43 |
modification_container = st.container()
|
| 44 |
|
| 45 |
with modification_container:
|
| 46 |
+
to_filter_columns = st.multiselect(
|
| 47 |
+
"Filter dataframe on",
|
| 48 |
+
df.columns,
|
| 49 |
+
key=key + "multiselect"
|
| 50 |
+
)
|
| 51 |
for column in to_filter_columns:
|
| 52 |
left, right = st.columns((1, 20))
|
| 53 |
left.write("↳")
|
|
|
|
| 80 |
),
|
| 81 |
)
|
| 82 |
if len(user_date_input) == 2:
|
| 83 |
+
user_date_input = tuple(
|
| 84 |
+
map(
|
| 85 |
+
pd.to_datetime,
|
| 86 |
+
user_date_input
|
| 87 |
+
)
|
| 88 |
+
)
|
| 89 |
start_date, end_date = user_date_input
|
| 90 |
df = df.loc[df[column].between(start_date, end_date)]
|
| 91 |
else:
|
|
|
|
| 93 |
f"Substring or regex in {column}",
|
| 94 |
)
|
| 95 |
if user_text_input:
|
| 96 |
+
df = df[df[column].astype(str).str.contains(user_text_input, case=False, na=False)]
|
| 97 |
|
| 98 |
return df
|
App/utils/priorite_pays.py
CHANGED
|
@@ -1,10 +1,54 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
dico = {}
|
| 9 |
for i in pays_all:
|
| 10 |
-
|
|
|
|
| 1 |
+
pays_all = [
|
| 2 |
+
"FRA",
|
| 3 |
+
"BEL",
|
| 4 |
+
"ESP",
|
| 5 |
+
"ITA",
|
| 6 |
+
"BRA",
|
| 7 |
+
"ATA",
|
| 8 |
+
"ARG",
|
| 9 |
+
"POL",
|
| 10 |
+
"ROU",
|
| 11 |
+
"BIG",
|
| 12 |
+
"SAM",
|
| 13 |
+
"UAE",
|
| 14 |
+
"SAU",
|
| 15 |
+
"KWT",
|
| 16 |
+
"OMN",
|
| 17 |
+
"BHR",
|
| 18 |
+
"QAT",
|
| 19 |
+
"JOR",
|
| 20 |
+
"EGY",
|
| 21 |
+
"ARM",
|
| 22 |
+
"UZB",
|
| 23 |
+
"IRN",
|
| 24 |
+
"KEN",
|
| 25 |
+
"GEO",
|
| 26 |
+
"LEB",
|
| 27 |
+
"UGA",
|
| 28 |
+
"PAK",
|
| 29 |
+
"IRQ",
|
| 30 |
+
"MTQ",
|
| 31 |
+
"GLP",
|
| 32 |
+
"REU",
|
| 33 |
+
"GUA",
|
| 34 |
+
"MTS",
|
| 35 |
+
"GLS",
|
| 36 |
+
"GUF",
|
| 37 |
+
"MTA",
|
| 38 |
+
"GLA",
|
| 39 |
+
"GUS",
|
| 40 |
+
"SXM",
|
| 41 |
+
"DOM",
|
| 42 |
+
"MAR",
|
| 43 |
+
"AMA",
|
| 44 |
+
"TUN",
|
| 45 |
+
"DZA",
|
| 46 |
+
"TUR",
|
| 47 |
+
"IAP",
|
| 48 |
+
"IET",
|
| 49 |
+
"TWN",
|
| 50 |
+
]
|
| 51 |
|
| 52 |
dico = {}
|
| 53 |
for i in pays_all:
|
| 54 |
+
dico[i] = len(pays_all) - pays_all.index(i)
|
App/utils/standadisation.py
CHANGED
|
@@ -1,17 +1,125 @@
|
|
| 1 |
-
dictionnaire = {
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
liste_stopword = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dictionnaire = {
|
| 2 |
+
"rg": "rouge",
|
| 3 |
+
"rges": "rouge",
|
| 4 |
+
"rge": "rouge",
|
| 5 |
+
"rse": "rose",
|
| 6 |
+
"rs": "rose",
|
| 7 |
+
"bl": "blanc",
|
| 8 |
+
"bdx": "Bordeaux",
|
| 9 |
+
"vdt": "vin de table",
|
| 10 |
+
"vdp": "vin de pays",
|
| 11 |
+
"blc": "blanc",
|
| 12 |
+
"bib": "bag in box",
|
| 13 |
+
"citr": "citron",
|
| 14 |
+
"co": "coco",
|
| 15 |
+
"gourm": "gourmand",
|
| 16 |
+
"patis": "patisserie",
|
| 17 |
+
"p'tits": "petit",
|
| 18 |
+
"p'tit": "petit",
|
| 19 |
+
"p tit": "petit",
|
| 20 |
+
"pt": "pepite",
|
| 21 |
+
"rev": "revil",
|
| 22 |
+
"succ": "sucettes",
|
| 23 |
+
"succet": "sucettes",
|
| 24 |
+
"chocohouse": "choco house",
|
| 25 |
+
"sach": "sachet",
|
| 26 |
+
"tab": "tablette",
|
| 27 |
+
"hte": "haute",
|
| 28 |
+
"spagh": "spaghetti",
|
| 29 |
+
"scht": "sachet",
|
| 30 |
+
"nr": "noir",
|
| 31 |
+
"caf": "cafe",
|
| 32 |
+
"barr": "barre",
|
| 33 |
+
"pces": "pieces",
|
| 34 |
+
"pc": "pieces",
|
| 35 |
+
"acidu": "acidule",
|
| 36 |
+
"blnc": "blanc",
|
| 37 |
+
"frui": "fruit",
|
| 38 |
+
"gourman": "gourmand",
|
| 39 |
+
"bte": "boîte",
|
| 40 |
+
"bt": "boîte",
|
| 41 |
+
"ptit": "petit",
|
| 42 |
+
"corb": "corbeil",
|
| 43 |
+
"ptits": "petit",
|
| 44 |
+
"pti": "petit",
|
| 45 |
+
"nois": "noisette",
|
| 46 |
+
"poul": "poulain",
|
| 47 |
+
"barq": "barquette",
|
| 48 |
+
"barqu": "barquette",
|
| 49 |
+
"fizz": "fizzy",
|
| 50 |
+
"st": "saint",
|
| 51 |
+
"mich": "michel",
|
| 52 |
+
"cal": "calendrier",
|
| 53 |
+
"calend": "calendrier",
|
| 54 |
+
"calendr": "calendrier",
|
| 55 |
+
"caram": "caramel",
|
| 56 |
+
"cava": "cavalier",
|
| 57 |
+
"har": "haribo",
|
| 58 |
+
"choc": "chocolat",
|
| 59 |
+
"choco": "chocolat",
|
| 60 |
+
"lt": "lait",
|
| 61 |
+
"choc'n": "chocolat noir",
|
| 62 |
+
"choc n": "chocolat noir",
|
| 63 |
+
"degust": "degustation",
|
| 64 |
+
"degus": "degustation",
|
| 65 |
+
"bis": "biscuit",
|
| 66 |
+
"coffr": "coffret",
|
| 67 |
+
"coff": "coffret",
|
| 68 |
+
"conf": "confiserie",
|
| 69 |
+
"confis": "confiserie",
|
| 70 |
+
"croco": "crocodile",
|
| 71 |
+
"dble": "double",
|
| 72 |
+
"dess": "dessert",
|
| 73 |
+
"doyp": "doypack",
|
| 74 |
+
"harib": "harib",
|
| 75 |
+
"et": "etui",
|
| 76 |
+
"exc": "excellence",
|
| 77 |
+
"excel": "excellence",
|
| 78 |
+
"frit": "friture",
|
| 79 |
+
"fritu": "friture",
|
| 80 |
+
"fritur": "friture",
|
| 81 |
+
"gd": "grand",
|
| 82 |
+
"gr": "grand",
|
| 83 |
+
"grd": "grand",
|
| 84 |
+
"grchoc": "grand chocolat",
|
| 85 |
+
"lat": "lait",
|
| 86 |
+
"ass": "assorti",
|
| 87 |
+
"assoti": "assorti",
|
| 88 |
+
"noug": "nougatine",
|
| 89 |
+
"nougat": "nougatine",
|
| 90 |
+
"scht": "sachet",
|
| 91 |
+
"sct": "secret",
|
| 92 |
+
"cho": "chocolat",
|
| 93 |
+
"bisc": "biscuit",
|
| 94 |
+
"am": "amande",
|
| 95 |
+
"liq": "liqueur",
|
| 96 |
+
"tabl": "tablette",
|
| 97 |
+
"asst": "assorti",
|
| 98 |
+
"tab": "tablette",
|
| 99 |
+
"bil": "bille",
|
| 100 |
+
"vali": "valisette",
|
| 101 |
+
"cda": "chevaliers d argouges",
|
| 102 |
+
"tub": "tubo",
|
| 103 |
+
"gril": "grille",
|
| 104 |
+
"amandesgrilles": "amandes grilles",
|
| 105 |
+
"ball": "ballotin",
|
| 106 |
+
"piecestubo": "pieces tubo",
|
| 107 |
+
}
|
| 108 |
|
| 109 |
+
liste_stopword = [
|
| 110 |
+
"oz",
|
| 111 |
+
"kg",
|
| 112 |
+
"g",
|
| 113 |
+
"lb",
|
| 114 |
+
"mg",
|
| 115 |
+
"l",
|
| 116 |
+
"cl",
|
| 117 |
+
"ml",
|
| 118 |
+
"tsp",
|
| 119 |
+
"tbsp",
|
| 120 |
+
"cm",
|
| 121 |
+
"x",
|
| 122 |
+
"cte",
|
| 123 |
+
"h",
|
| 124 |
+
"unknown",
|
| 125 |
+
]
|
app.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
import requests
|
| 3 |
|
| 4 |
# Configuration
|
| 5 |
st.set_page_config(
|
| 6 |
page_title="Recherche",
|
| 7 |
-
page_icon="images/logo.png",
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
)
|
| 12 |
change_footer_style = """
|
| 13 |
<style>
|
|
@@ -27,7 +26,6 @@ def get_product_info(EAN):
|
|
| 27 |
return {"error": "Product not found"}
|
| 28 |
|
| 29 |
|
| 30 |
-
|
| 31 |
""" Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
|
| 32 |
|
| 33 |
Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
|
|
@@ -35,7 +33,7 @@ Ici, vous pouvez rechercher des informations détaillées sur une multitude de p
|
|
| 35 |
Profitez de votre exploration! 🕵️♀️
|
| 36 |
"""
|
| 37 |
# Test de la fonction
|
| 38 |
-
EAN =st.text_input("EAN",
|
| 39 |
-
if EAN
|
| 40 |
product_info = get_product_info(EAN)
|
| 41 |
-
st.json(product_info)
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
import requests
|
| 3 |
|
| 4 |
# Configuration
|
| 5 |
st.set_page_config(
|
| 6 |
page_title="Recherche",
|
| 7 |
+
page_icon="images/logo.png",
|
| 8 |
+
layout="wide",
|
| 9 |
+
initial_sidebar_state="auto",
|
|
|
|
| 10 |
)
|
| 11 |
change_footer_style = """
|
| 12 |
<style>
|
|
|
|
| 26 |
return {"error": "Product not found"}
|
| 27 |
|
| 28 |
|
|
|
|
| 29 |
""" Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
|
| 30 |
|
| 31 |
Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
|
|
|
|
| 33 |
Profitez de votre exploration! 🕵️♀️
|
| 34 |
"""
|
| 35 |
# Test de la fonction
|
| 36 |
+
EAN = st.text_input("EAN", "0737628064502") # remplacer par l'EAN du produit
|
| 37 |
+
if EAN:
|
| 38 |
product_info = get_product_info(EAN)
|
| 39 |
+
st.json(product_info)
|
pages/🤖_Gestion_de_rupture_famille.py
CHANGED
|
@@ -1,170 +1,262 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
-
# Page configuration
|
| 12 |
def config_page():
|
| 13 |
st.set_page_config(
|
| 14 |
page_title="Gestion des ruptures",
|
| 15 |
page_icon="images/Carrefour_logo.png",
|
| 16 |
-
layout="wide"
|
| 17 |
)
|
| 18 |
hide_streamlit_style = """
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
| 24 |
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
with col1 :
|
| 53 |
-
min_product_id = input_box.valid_produict_id()
|
| 54 |
-
|
| 55 |
-
with col2 :
|
| 56 |
-
vaind_class_id = input_box.valid_class_id()
|
| 57 |
-
|
| 58 |
-
columns1, columns2, columns3 = st.columns(3)
|
| 59 |
-
|
| 60 |
-
with columns1:
|
| 61 |
-
nb_countries = input_box.get_number_countries()
|
| 62 |
-
|
| 63 |
-
with columns2 :
|
| 64 |
-
proportion = input_box.get_proportion()
|
| 65 |
-
|
| 66 |
-
with columns3 :
|
| 67 |
-
show_proportion = input_box.show_proportion()
|
| 68 |
-
|
| 69 |
-
# excution
|
| 70 |
-
if st.button("RUN ", key="run_button"):
|
| 71 |
-
data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
|
| 72 |
-
Country, merged = nouvelle_data(data,
|
| 73 |
-
str(product_id),
|
| 74 |
-
str(class_id))
|
| 75 |
-
|
| 76 |
-
data_with_pro = finale_merged(merged,
|
| 77 |
-
Country,
|
| 78 |
-
product_id,
|
| 79 |
-
class_id)
|
| 80 |
-
|
| 81 |
-
if show_proportion :
|
| 82 |
-
display_data_with_download_button(data_with_pro, title="Show data with ratios")
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
"""## The data below is filtered as follows: """
|
| 86 |
-
"- Number of countries greater than or equal to ", nb_countries
|
| 87 |
-
"- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
|
| 88 |
-
|
| 89 |
-
data_countries_ratio = cond_pays_proportion(data_with_pro,
|
| 90 |
-
nb_countries,
|
| 91 |
-
proportion,
|
| 92 |
-
product_id)
|
| 93 |
-
|
| 94 |
-
if data_countries_ratio.empty :
|
| 95 |
-
st.write("No result for the above criterion ")
|
| 96 |
-
|
| 97 |
-
else :
|
| 98 |
-
df = supprime_country(data_countries_ratio)
|
| 99 |
-
max_number_index = df.groupby(product_id)['nombre'].idxmax()
|
| 100 |
-
df_max_number = df.loc[max_number_index]
|
| 101 |
-
df_max_number.drop(["Countries"], axis = 1, inplace =True)
|
| 102 |
-
|
| 103 |
-
finale_df = Merger(data,
|
| 104 |
-
df_max_number,
|
| 105 |
-
product_id,
|
| 106 |
-
class_id)
|
| 107 |
|
| 108 |
-
tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
display_data_with_download_button(finale_df, title="Data with proposed changes")
|
| 115 |
|
| 116 |
-
"## Country priority "
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
|
|
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
finale_df_1 = ajout_simularite(Merger(data,df_max_poids1, product_id, class_id))
|
| 148 |
-
display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
|
| 149 |
-
st.success('Done!', icon="✅")
|
| 150 |
-
st.balloons()
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
|
| 158 |
if __name__ == "__main__":
|
| 159 |
-
lien_label = "Example of input"
|
| 160 |
-
lien_url = "https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905"
|
| 161 |
-
lien_html = f'<a href="{lien_url}">{lien_label}</a>'
|
| 162 |
-
|
| 163 |
-
lien_label_ = "Documentation utilisateur"
|
| 164 |
-
lien_url_ = "https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing"
|
| 165 |
-
lien_html_ = f'<a href="{lien_url_}">{lien_label_}</a>'
|
| 166 |
-
|
| 167 |
config_page()
|
| 168 |
-
st.sidebar.markdown(
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
app()
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from App.class_input_box.input_box import InputsBox
|
| 3 |
+
from App.functions_rupture.functions_gestion import (
|
| 4 |
+
filter_data_with_valid_keys,
|
| 5 |
+
process_new_data,
|
| 6 |
+
finalize_merged_data,
|
| 7 |
+
filter_by_country_and_proportion,
|
| 8 |
+
process_country_priority,
|
| 9 |
+
)
|
| 10 |
+
from App.utils.divers_function import (
|
| 11 |
+
display_data_with_download_button,
|
| 12 |
+
supprime_country,
|
| 13 |
+
merge_and_update_classification,
|
| 14 |
+
add_text_similarity
|
| 15 |
+
)
|
| 16 |
+
from App.utils.filter_dataframe import filter_dataframe
|
| 17 |
+
import logging
|
| 18 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 19 |
|
| 20 |
|
|
|
|
| 21 |
def config_page():
|
| 22 |
st.set_page_config(
|
| 23 |
page_title="Gestion des ruptures",
|
| 24 |
page_icon="images/Carrefour_logo.png",
|
| 25 |
+
layout="wide",
|
| 26 |
)
|
| 27 |
hide_streamlit_style = """
|
| 28 |
+
<style>
|
| 29 |
+
footer {visibility: hidden;}
|
| 30 |
+
</style>
|
| 31 |
+
"""
|
| 32 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
| 33 |
|
| 34 |
|
| 35 |
+
def display_filters(input_box):
|
| 36 |
+
col1, col2 = st.columns(2)
|
| 37 |
+
with col1:
|
| 38 |
+
product_id = input_box.get_product_id()
|
| 39 |
+
min_product_id = input_box.valid_produict_id()
|
| 40 |
+
with col2:
|
| 41 |
+
class_id = input_box.get_class_id()
|
| 42 |
+
valid_class_id = input_box.valid_class_id()
|
| 43 |
+
|
| 44 |
+
col1, col2, col3 = st.columns(3)
|
| 45 |
+
with col1:
|
| 46 |
+
nb_countries = input_box.get_number_countries()
|
| 47 |
+
with col2:
|
| 48 |
+
proportion = input_box.get_proportion()
|
| 49 |
+
with col3:
|
| 50 |
+
show_proportion = input_box.show_proportion()
|
| 51 |
+
|
| 52 |
+
return (
|
| 53 |
+
product_id,
|
| 54 |
+
class_id,
|
| 55 |
+
min_product_id,
|
| 56 |
+
valid_class_id,
|
| 57 |
+
nb_countries,
|
| 58 |
+
proportion,
|
| 59 |
+
show_proportion,
|
| 60 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
|
|
|
| 62 |
|
| 63 |
+
def process_data(
|
| 64 |
+
data,
|
| 65 |
+
product_id,
|
| 66 |
+
class_id,
|
| 67 |
+
min_product_id,
|
| 68 |
+
valid_class_id,
|
| 69 |
+
nb_countries,
|
| 70 |
+
proportion,
|
| 71 |
+
show_proportion,
|
| 72 |
+
):
|
| 73 |
+
logging.debug(f"Starting process_data with product_id: {product_id}, class_id: {class_id}")
|
| 74 |
+
data = filter_data_with_valid_keys(
|
| 75 |
+
data, product_id, class_id, min_product_id, valid_class_id
|
| 76 |
+
)
|
| 77 |
+
# st.dataframe(data)
|
| 78 |
+
logging.debug("Data filtered with valid keys")
|
| 79 |
+
Country, merged = process_new_data(data, str(product_id), class_id)
|
| 80 |
+
logging.debug("New data processed")
|
| 81 |
+
data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
|
| 82 |
+
# st.dataframe(data_with_pro)
|
| 83 |
+
logging.debug("Merged data finalized")
|
| 84 |
+
if show_proportion:
|
| 85 |
+
logging.info("ibra 1 ")
|
| 86 |
+
display_data_with_download_button(data_with_pro, title="Show data with ratios")
|
| 87 |
+
logging.info("ibra 2")
|
| 88 |
+
|
| 89 |
+
st.write("## The data below is filtered as follows:")
|
| 90 |
+
st.write(f"- Number of countries greater than or equal to {nb_countries}")
|
| 91 |
+
st.write(
|
| 92 |
+
f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
|
| 93 |
+
)
|
| 94 |
+
data_countries_ratio = filter_by_country_and_proportion(
|
| 95 |
+
data_with_pro, nb_countries, proportion, product_id
|
| 96 |
+
)
|
| 97 |
+
if data_countries_ratio.empty:
|
| 98 |
+
st.write("No result for the above criterion")
|
| 99 |
+
else:
|
| 100 |
+
display_filtered_data(data, data_countries_ratio, product_id, class_id)
|
| 101 |
|
| 102 |
+
display_country_priority(data_with_pro, data, product_id, class_id)
|
|
|
|
| 103 |
|
|
|
|
| 104 |
|
| 105 |
+
def display_filtered_data(data, data_countries_ratio, product_id, class_id):
|
| 106 |
+
df = supprime_country(data_countries_ratio)
|
| 107 |
+
max_number_index = df.groupby(product_id)["count"].idxmax()
|
| 108 |
+
df_max_number = df.loc[max_number_index]
|
| 109 |
+
df_max_number.drop(["Countries"], axis=1, inplace=True)
|
| 110 |
|
| 111 |
+
finale_df = merge_and_update_classification(
|
| 112 |
+
data, df_max_number, product_id, class_id
|
| 113 |
+
)
|
| 114 |
|
| 115 |
+
tab1, tab2 = st.tabs(
|
| 116 |
+
["Data without decision-making", "Data with proposed changes"]
|
| 117 |
+
)
|
| 118 |
+
with tab1:
|
| 119 |
+
display_data_with_download_button(
|
| 120 |
+
df,
|
| 121 |
+
title="Data without decision-making"
|
| 122 |
+
)
|
| 123 |
+
with tab2:
|
| 124 |
+
display_data_with_download_button(
|
| 125 |
+
finale_df,
|
| 126 |
+
title="Data with proposed changes"
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def display_country_priority(data_with_pro, data, product_id, class_id):
|
| 131 |
+
st.write("## Country priority")
|
| 132 |
+
priority_data, df_equa, df_nequa = process_country_priority(
|
| 133 |
+
data_with_pro, product_id
|
| 134 |
+
)
|
| 135 |
|
| 136 |
+
tab1, tab2, tab3, tab4 = st.tabs(
|
| 137 |
+
[
|
| 138 |
+
"Data without decision-making",
|
| 139 |
+
"Equality case and more than 1",
|
| 140 |
+
"Cases of inequality",
|
| 141 |
+
"Data with proposed changes more than 2",
|
| 142 |
+
]
|
| 143 |
+
)
|
| 144 |
|
| 145 |
+
with tab1:
|
| 146 |
+
display_data_with_download_button(
|
| 147 |
+
priority_data, title="Data without decision-making"
|
| 148 |
+
)
|
| 149 |
+
with tab2:
|
| 150 |
+
display_data_with_download_button(df_equa, title="Equality case")
|
| 151 |
+
with tab3:
|
| 152 |
+
df_nequa_ = df_nequa[df_nequa.total_by_product.apply(
|
| 153 |
+
lambda x: int(x) > 2
|
| 154 |
+
)
|
| 155 |
+
]
|
| 156 |
+
display_data_with_download_button(
|
| 157 |
+
df_nequa_,
|
| 158 |
+
title="Cases of inequality"
|
| 159 |
+
)
|
| 160 |
+
with tab4:
|
| 161 |
+
display_proposed_changes(df_nequa_, data, product_id, class_id)
|
| 162 |
+
|
| 163 |
+
display_one_vs_one(df_nequa, data, product_id, class_id)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def display_proposed_changes(df_nequa_, data, product_id, class_id):
|
| 167 |
+
max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
|
| 168 |
+
df_max_poids = df_nequa_.loc[max_poids_index]
|
| 169 |
+
df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
|
| 170 |
+
finale_df_ = merge_and_update_classification(
|
| 171 |
+
data, df_max_poids, product_id, class_id
|
| 172 |
+
)
|
| 173 |
+
display_data_with_download_button(
|
| 174 |
+
finale_df_, title="Data with proposed changes more than 2"
|
| 175 |
+
)
|
| 176 |
|
| 177 |
|
| 178 |
+
def display_one_vs_one(df_nequa, data, product_id, class_id):
|
| 179 |
+
df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
|
| 180 |
+
max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
|
| 181 |
+
df_max_poids1 = df_nequa_1.loc[max_poids_index1]
|
| 182 |
+
df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
|
| 183 |
+
finale_df_1 = add_text_similarity(
|
| 184 |
+
merge_and_update_classification(
|
| 185 |
+
data,
|
| 186 |
+
df_max_poids1,
|
| 187 |
+
product_id, class_id)
|
| 188 |
+
)
|
| 189 |
+
display_data_with_download_button(
|
| 190 |
+
finale_df_1, title=" One vs One with similarity score"
|
| 191 |
+
)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
+
def app():
|
| 195 |
+
logging.info("Starting app function")
|
| 196 |
+
st.title("Gestion des ruptures famille")
|
| 197 |
+
input_box = InputsBox()
|
| 198 |
+
data = input_box.get_data()
|
| 199 |
+
logging.debug(f"Data retrieved: {data is not None}")
|
| 200 |
+
|
| 201 |
+
if data is not None and data.shape[0] != 0:
|
| 202 |
+
logging.info("Data is valid, proceeding with processing")
|
| 203 |
+
st.header("Data")
|
| 204 |
+
st.dataframe(filter_dataframe(data))
|
| 205 |
+
|
| 206 |
+
st.header("Parameters")
|
| 207 |
+
(
|
| 208 |
+
product_id,
|
| 209 |
+
class_id,
|
| 210 |
+
min_product_id,
|
| 211 |
+
valid_class_id,
|
| 212 |
+
nb_countries,
|
| 213 |
+
proportion,
|
| 214 |
+
show_proportion,
|
| 215 |
+
) = display_filters(input_box)
|
| 216 |
+
list_product_selected = filter_dataframe(
|
| 217 |
+
data,
|
| 218 |
+
'data_filter_by_holding'
|
| 219 |
+
)[product_id].unique().tolist()
|
| 220 |
+
if list_product_selected is not None and len(list_product_selected) > 0:
|
| 221 |
+
data_selected = data[data[product_id].isin(list_product_selected)]
|
| 222 |
+
else:
|
| 223 |
+
st.warning("No addictionnal filter selecting")
|
| 224 |
+
data_selected = data.copy()
|
| 225 |
+
if st.button("RUN", key="run_button"):
|
| 226 |
+
try:
|
| 227 |
+
process_data(
|
| 228 |
+
data_selected,
|
| 229 |
+
product_id,
|
| 230 |
+
class_id,
|
| 231 |
+
min_product_id,
|
| 232 |
+
valid_class_id,
|
| 233 |
+
nb_countries,
|
| 234 |
+
proportion,
|
| 235 |
+
show_proportion,
|
| 236 |
+
)
|
| 237 |
+
st.success("Done!", icon="✅")
|
| 238 |
+
st.balloons()
|
| 239 |
+
except Exception as e:
|
| 240 |
+
st.error(f"An error occurred: {str(e)}", icon="🚨")
|
| 241 |
+
else:
|
| 242 |
+
logging.warning("Data is None or empty")
|
| 243 |
+
st.info(
|
| 244 |
+
"""Ensure that column names are capitalized and that product_id
|
| 245 |
+
and class_id descriptions are present, as well as a country
|
| 246 |
+
column.""",
|
| 247 |
+
icon="ℹ️",
|
| 248 |
+
)
|
| 249 |
+
logging.info("App function completed")
|
| 250 |
|
| 251 |
|
| 252 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
config_page()
|
| 254 |
+
st.sidebar.markdown(
|
| 255 |
+
'<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
|
| 256 |
+
unsafe_allow_html=True,
|
| 257 |
+
)
|
| 258 |
+
st.sidebar.markdown(
|
| 259 |
+
'<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
|
| 260 |
+
unsafe_allow_html=True,
|
| 261 |
+
)
|
| 262 |
app()
|
pages/🦾_Gestion_de_rupture_sous_famille.py
CHANGED
|
@@ -1,172 +1,261 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
# Page configuration
|
| 12 |
-
st.set_page_config(
|
| 13 |
-
page_title="Gestion des ruptures",
|
| 14 |
-
page_icon="images/Carrefour_logo.png",
|
| 15 |
-
layout="wide"
|
| 16 |
)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
def app():
|
| 26 |
-
st.title("Gestion des ruptures sous famille")
|
| 27 |
-
|
| 28 |
-
input_box = InputsBox()
|
| 29 |
-
|
| 30 |
-
data = input_box.get_data()
|
| 31 |
-
|
| 32 |
-
try:
|
| 33 |
-
if data.shape[0] != 0 :
|
| 34 |
-
st.header("Data")
|
| 35 |
-
|
| 36 |
-
st.dataframe(filter_dataframe(data))
|
| 37 |
-
|
| 38 |
-
"## Parameters"
|
| 39 |
-
|
| 40 |
-
col1, col2 = st.columns(2)
|
| 41 |
-
|
| 42 |
-
with col1 :
|
| 43 |
-
product_id = input_box.get_product_id()
|
| 44 |
-
|
| 45 |
-
with col2 :
|
| 46 |
-
class_id = input_box.get_class_id()
|
| 47 |
-
|
| 48 |
-
'## Filters'
|
| 49 |
-
col1, col2 = st.columns(2)
|
| 50 |
-
|
| 51 |
-
with col1 :
|
| 52 |
-
min_product_id = input_box.valid_produict_id()
|
| 53 |
-
|
| 54 |
-
with col2 :
|
| 55 |
-
vaind_class_id = input_box.valid_class_id()
|
| 56 |
-
|
| 57 |
-
columns1, columns2, columns3, columns4 = st.columns(4)
|
| 58 |
-
|
| 59 |
-
with columns1:
|
| 60 |
-
nb_countries = input_box.get_number_countries()
|
| 61 |
-
|
| 62 |
-
with columns2 :
|
| 63 |
-
proportion = input_box.get_proportion()
|
| 64 |
-
|
| 65 |
-
with columns3 :
|
| 66 |
-
countries = input_box.get_countries()
|
| 67 |
-
|
| 68 |
-
with columns4 :
|
| 69 |
-
show_proportion = input_box.show_proportion()
|
| 70 |
-
|
| 71 |
-
#execution
|
| 72 |
-
if st.button("RUN ", key="run_button"):
|
| 73 |
-
data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
|
| 74 |
-
data = data[data.COUNTRY_KEY.isin(countries)]
|
| 75 |
-
Country, merged = nouvelle_data(data,
|
| 76 |
-
str(product_id),
|
| 77 |
-
str(class_id))
|
| 78 |
-
|
| 79 |
-
data_with_pro = finale_merged(merged,
|
| 80 |
-
Country,
|
| 81 |
-
product_id,
|
| 82 |
-
class_id)
|
| 83 |
-
|
| 84 |
-
if show_proportion :
|
| 85 |
-
display_data_with_download_button(data_with_pro, title="Show data with ratios")
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"""## The data below is filtered as follows: """
|
| 89 |
-
"- Number of countries greater than or equal to ", nb_countries
|
| 90 |
-
"- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
|
| 91 |
-
|
| 92 |
-
data_countries_ratio = cond_pays_proportion(data_with_pro,
|
| 93 |
-
nb_countries,
|
| 94 |
-
proportion,
|
| 95 |
-
product_id)
|
| 96 |
-
|
| 97 |
-
if data_countries_ratio.empty :
|
| 98 |
-
st.write("No result for the above criterion ")
|
| 99 |
-
|
| 100 |
-
else :
|
| 101 |
-
df = supprime_country(data_countries_ratio)
|
| 102 |
-
max_number_index = df.groupby(product_id)['nombre'].idxmax()
|
| 103 |
-
df_max_number = df.loc[max_number_index]
|
| 104 |
-
df_max_number.drop(["Countries"], axis = 1, inplace =True)
|
| 105 |
-
|
| 106 |
-
finale_df = Merger(data,
|
| 107 |
-
df_max_number,
|
| 108 |
-
product_id,
|
| 109 |
-
class_id)
|
| 110 |
-
|
| 111 |
-
tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
|
| 112 |
-
|
| 113 |
-
with tab1 :
|
| 114 |
-
display_data_with_download_button(df, title="Data without decision-making")
|
| 115 |
-
|
| 116 |
-
with tab2 :
|
| 117 |
-
display_data_with_download_button(finale_df, title="Data with proposed changes")
|
| 118 |
-
|
| 119 |
-
"## Country priority "
|
| 120 |
-
|
| 121 |
-
priority_data, df_equa, df_nequa = cond_pays_priorite(data_with_pro, product_id)
|
| 122 |
-
tab1, tab2, tab3, tab4 = st.tabs(["Data without decision-making", "Equality case and mt1", "Cases of inequality", "Data with proposed changes mt2"])
|
| 123 |
-
|
| 124 |
-
with tab1 :
|
| 125 |
-
display_data_with_download_button(priority_data, title="Data without decision-making")
|
| 126 |
-
|
| 127 |
-
with tab2 :
|
| 128 |
-
display_data_with_download_button(df_equa, title="Equality case")
|
| 129 |
-
|
| 130 |
-
with tab3 :
|
| 131 |
-
df_nequa_ = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) > 2))]
|
| 132 |
-
display_data_with_download_button(df_nequa_, title="Cases of inequality")
|
| 133 |
-
|
| 134 |
-
max_poids_index = df_nequa_.groupby(product_id)['Poids'].idxmax()
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
finale_df_ = Merger(data,df_max_poids, product_id, class_id)
|
| 140 |
-
with tab4 :
|
| 141 |
-
display_data_with_download_button(finale_df_, title="Data with proposed changes mt2")
|
| 142 |
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
finale_df_1 = ajout_simularite(Merger(data,df_max_poids1, product_id, class_id))
|
| 151 |
-
display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
|
| 152 |
-
st.success('Done!', icon="✅")
|
| 153 |
-
st.balloons()
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
|
| 161 |
if __name__ == "__main__":
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
st.sidebar.markdown(lien_html, unsafe_allow_html=True)
|
| 172 |
app()
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from App.class_input_box.input_box import InputsBox
|
| 3 |
+
from App.functions_rupture.functions_gestion import (
|
| 4 |
+
filter_data_with_valid_keys,
|
| 5 |
+
process_new_data,
|
| 6 |
+
finalize_merged_data,
|
| 7 |
+
filter_by_country_and_proportion,
|
| 8 |
+
process_country_priority,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
)
|
| 10 |
+
from App.utils.divers_function import (
|
| 11 |
+
display_data_with_download_button,
|
| 12 |
+
supprime_country,
|
| 13 |
+
merge_and_update_classification,
|
| 14 |
+
add_text_similarity,
|
| 15 |
+
)
|
| 16 |
+
from App.utils.filter_dataframe import filter_dataframe
|
| 17 |
+
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
logging.basicConfig(
|
| 20 |
+
level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
|
| 21 |
+
)
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
+
def config_page():
|
| 25 |
+
st.set_page_config(
|
| 26 |
+
page_title="Gestion des ruptures",
|
| 27 |
+
page_icon="images/Carrefour_logo.png",
|
| 28 |
+
layout="wide",
|
| 29 |
+
)
|
| 30 |
+
hide_streamlit_style = """
|
| 31 |
+
<style>
|
| 32 |
+
footer {visibility: hidden;}
|
| 33 |
+
</style>
|
| 34 |
+
"""
|
| 35 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def display_filters(input_box):
|
| 39 |
+
col1, col2 = st.columns(2)
|
| 40 |
+
with col1:
|
| 41 |
+
product_id = input_box.get_product_id()
|
| 42 |
+
min_product_id = input_box.valid_produict_id()
|
| 43 |
+
with col2:
|
| 44 |
+
class_id = input_box.get_class_id()
|
| 45 |
+
valid_class_id = input_box.valid_class_id()
|
| 46 |
+
|
| 47 |
+
col1, col2, col3 = st.columns(3)
|
| 48 |
+
with col1:
|
| 49 |
+
nb_countries = input_box.get_number_countries()
|
| 50 |
+
with col2:
|
| 51 |
+
proportion = input_box.get_proportion()
|
| 52 |
+
with col3:
|
| 53 |
+
show_proportion = input_box.show_proportion()
|
| 54 |
+
|
| 55 |
+
countries = input_box.get_countries()
|
| 56 |
+
|
| 57 |
+
return (
|
| 58 |
+
product_id,
|
| 59 |
+
class_id,
|
| 60 |
+
min_product_id,
|
| 61 |
+
valid_class_id,
|
| 62 |
+
nb_countries,
|
| 63 |
+
proportion,
|
| 64 |
+
countries,
|
| 65 |
+
show_proportion,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def process_data(
|
| 70 |
+
data,
|
| 71 |
+
product_id,
|
| 72 |
+
class_id,
|
| 73 |
+
min_product_id,
|
| 74 |
+
valid_class_id,
|
| 75 |
+
nb_countries,
|
| 76 |
+
proportion,
|
| 77 |
+
countries,
|
| 78 |
+
show_proportion,
|
| 79 |
+
):
|
| 80 |
+
logging.debug(
|
| 81 |
+
f"Starting process_data with product_id: {product_id}, class_id: {class_id}"
|
| 82 |
+
)
|
| 83 |
+
data = filter_data_with_valid_keys(
|
| 84 |
+
data, product_id, class_id, min_product_id, valid_class_id
|
| 85 |
+
)
|
| 86 |
+
data = data[data.COUNTRY_KEY.isin(countries)]
|
| 87 |
+
logging.debug("Data filtered with valid keys and countries")
|
| 88 |
+
Country, merged = process_new_data(data, str(product_id), str(class_id))
|
| 89 |
+
logging.debug("New data processed")
|
| 90 |
+
data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
|
| 91 |
+
logging.debug("Merged data finalized")
|
| 92 |
+
|
| 93 |
+
if show_proportion:
|
| 94 |
+
logging.info("Displaying data with ratios")
|
| 95 |
+
display_data_with_download_button(data_with_pro, title="Show data with ratios")
|
| 96 |
+
|
| 97 |
+
st.write("## The data below is filtered as follows:")
|
| 98 |
+
st.write(f"- Number of countries greater than or equal to {nb_countries}")
|
| 99 |
+
st.write(
|
| 100 |
+
f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
data_countries_ratio = filter_by_country_and_proportion(
|
| 104 |
+
data_with_pro, nb_countries, proportion, product_id
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
if data_countries_ratio.empty:
|
| 108 |
+
st.write("No result for the above criterion")
|
| 109 |
+
else:
|
| 110 |
+
display_filtered_data(data, data_countries_ratio, product_id, class_id)
|
| 111 |
+
|
| 112 |
+
display_country_priority(data_with_pro, data, product_id, class_id)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def display_filtered_data(data, data_countries_ratio, product_id, class_id):
|
| 116 |
+
df = supprime_country(data_countries_ratio)
|
| 117 |
+
max_number_index = df.groupby(product_id)["count"].idxmax()
|
| 118 |
+
df_max_number = df.loc[max_number_index]
|
| 119 |
+
df_max_number.drop(["Countries"], axis=1, inplace=True)
|
| 120 |
+
|
| 121 |
+
finale_df = merge_and_update_classification(
|
| 122 |
+
data, df_max_number, product_id, class_id
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
|
| 126 |
+
with tab1:
|
| 127 |
+
display_data_with_download_button(df, title="Data without decision-making")
|
| 128 |
+
with tab2:
|
| 129 |
+
display_data_with_download_button(finale_df, title="Data with proposed changes")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def display_country_priority(data_with_pro, data, product_id, class_id):
|
| 133 |
+
st.write("## Country priority")
|
| 134 |
+
priority_data, df_equa, df_nequa = process_country_priority(
|
| 135 |
+
data_with_pro, product_id
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
tab1, tab2, tab3, tab4 = st.tabs(
|
| 139 |
+
[
|
| 140 |
+
"Data without decision-making",
|
| 141 |
+
"Equality case and more than 1",
|
| 142 |
+
"Cases of inequality",
|
| 143 |
+
"Data with proposed changes more than 2",
|
| 144 |
+
]
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
with tab1:
|
| 148 |
+
display_data_with_download_button(
|
| 149 |
+
priority_data, title="Data without decision-making"
|
| 150 |
+
)
|
| 151 |
+
with tab2:
|
| 152 |
+
display_data_with_download_button(df_equa, title="Equality case")
|
| 153 |
+
with tab3:
|
| 154 |
+
df_nequa_ = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) > 2)]
|
| 155 |
+
display_data_with_download_button(df_nequa_, title="Cases of inequality")
|
| 156 |
+
with tab4:
|
| 157 |
+
display_proposed_changes(df_nequa_, data, product_id, class_id)
|
| 158 |
+
|
| 159 |
+
display_one_vs_one(df_nequa, data, product_id, class_id)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def display_proposed_changes(df_nequa_, data, product_id, class_id):
|
| 163 |
+
max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
|
| 164 |
+
df_max_poids = df_nequa_.loc[max_poids_index]
|
| 165 |
+
df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
|
| 166 |
+
finale_df_ = merge_and_update_classification(
|
| 167 |
+
data, df_max_poids, product_id, class_id
|
| 168 |
+
)
|
| 169 |
+
display_data_with_download_button(
|
| 170 |
+
finale_df_, title="Data with proposed changes more than 2"
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def display_one_vs_one(df_nequa, data, product_id, class_id):
|
| 175 |
+
df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
|
| 176 |
+
max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
|
| 177 |
+
df_max_poids1 = df_nequa_1.loc[max_poids_index1]
|
| 178 |
+
df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
|
| 179 |
+
|
| 180 |
+
finale_df_1 = add_text_similarity(
|
| 181 |
+
merge_and_update_classification(data, df_max_poids1, product_id, class_id)
|
| 182 |
+
)
|
| 183 |
+
display_data_with_download_button(
|
| 184 |
+
finale_df_1, title=" One vs One with similarity score"
|
| 185 |
+
)
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
+
def app():
|
| 189 |
+
logging.info("Starting app function")
|
| 190 |
+
st.title("Gestion des ruptures sous famille")
|
| 191 |
+
input_box = InputsBox()
|
| 192 |
+
data = input_box.get_data()
|
| 193 |
+
logging.debug(f"Data retrieved: {data is not None}")
|
| 194 |
+
|
| 195 |
+
if data is not None and data.shape[0] != 0:
|
| 196 |
+
logging.info("Data is valid, proceeding with processing")
|
| 197 |
+
st.header("Data")
|
| 198 |
+
st.dataframe(filter_dataframe(data))
|
| 199 |
+
|
| 200 |
+
st.header("Parameters")
|
| 201 |
+
(
|
| 202 |
+
product_id,
|
| 203 |
+
class_id,
|
| 204 |
+
min_product_id,
|
| 205 |
+
valid_class_id,
|
| 206 |
+
nb_countries,
|
| 207 |
+
proportion,
|
| 208 |
+
countries,
|
| 209 |
+
show_proportion,
|
| 210 |
+
) = display_filters(input_box)
|
| 211 |
+
|
| 212 |
+
list_product_selected = (
|
| 213 |
+
filter_dataframe(data, "data_filter_by_holding")[product_id]
|
| 214 |
+
.unique()
|
| 215 |
+
.tolist()
|
| 216 |
+
)
|
| 217 |
+
if list_product_selected is not None and len(list_product_selected) > 0:
|
| 218 |
+
data_selected = data[data[product_id].isin(list_product_selected)]
|
| 219 |
+
else:
|
| 220 |
+
st.warning("No additional filter selected")
|
| 221 |
+
data_selected = data.copy()
|
| 222 |
+
|
| 223 |
+
if st.button("RUN", key="run_button"):
|
| 224 |
+
try:
|
| 225 |
+
process_data(
|
| 226 |
+
data_selected,
|
| 227 |
+
product_id,
|
| 228 |
+
class_id,
|
| 229 |
+
min_product_id,
|
| 230 |
+
valid_class_id,
|
| 231 |
+
nb_countries,
|
| 232 |
+
proportion,
|
| 233 |
+
countries,
|
| 234 |
+
show_proportion,
|
| 235 |
+
)
|
| 236 |
+
st.success("Done!", icon="✅")
|
| 237 |
+
st.balloons()
|
| 238 |
+
except Exception as e:
|
| 239 |
+
st.error(f"An error occurred: {str(e)}", icon="🚨")
|
| 240 |
+
else:
|
| 241 |
+
logging.warning("Data is None or empty")
|
| 242 |
+
st.info(
|
| 243 |
+
"""Ensure that column names are capitalized and that product_id
|
| 244 |
+
and class_id descriptions are present, as well as a country
|
| 245 |
+
column.""",
|
| 246 |
+
icon="ℹ️",
|
| 247 |
+
)
|
| 248 |
+
logging.info("App function completed")
|
| 249 |
|
| 250 |
|
| 251 |
if __name__ == "__main__":
|
| 252 |
+
config_page()
|
| 253 |
+
st.sidebar.markdown(
|
| 254 |
+
'<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
|
| 255 |
+
unsafe_allow_html=True,
|
| 256 |
+
)
|
| 257 |
+
st.sidebar.markdown(
|
| 258 |
+
'<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
|
| 259 |
+
unsafe_allow_html=True,
|
| 260 |
+
)
|
|
|
|
| 261 |
app()
|