COULIBALY Bourahima commited on
Commit
2c49a88
·
1 Parent(s): 38b4487
.vscode/settings.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "DockerRun.DisableAutoGenerateConfig": true
3
+ }
App/class_input_box/input_box.py CHANGED
@@ -1,6 +1,5 @@
1
  import pandas as pd
2
  import streamlit as st
3
- from typing import Dict
4
 
5
 
6
  class InputsBox:
@@ -9,49 +8,82 @@ class InputsBox:
9
  self.columns = None
10
  self.product_id = None
11
  self.class_id = None
12
-
13
- def get_data(self) :
14
- uploaded_file = st.file_uploader("Choose a CSV file with the separator ';' ", type=["csv"])
15
-
 
 
16
  if uploaded_file is not None:
17
- #try :
18
- self.data = pd.read_csv(uploaded_file,dtype=str, sep=";", encoding="latin-1")
 
 
 
 
 
 
 
 
19
 
20
- #except :
21
- # self.data = pd.read_csv(uploaded_file,dtype=str, sep=";", encoding="utf-8" )
22
-
23
- self.columns = self.data.columns.tolist()
24
  return self.data
25
 
26
  def valid_produict_id(self) -> int:
27
- min_len = st.number_input("Minimum len of product_id", max_value= 25, min_value=1, value= 2, key = "pp")
 
 
 
 
 
 
28
  return min_len
29
-
30
  def valid_class_id(self) -> str:
31
- valid = st.text_input("First element of No valid class_id separed by ;")
 
 
32
  return valid.split(";")
33
 
34
  def get_product_id(self) -> str:
35
- self.product_id = st.selectbox("product_id (BARCODE)", options=self.columns, key="product_id")
 
 
36
  return self.product_id
37
 
38
-
39
  def get_class_id(self) -> str:
40
- self.class_id = st.selectbox("class_id (WW_CLASS_KEY)", options=self.columns, key="class_id")
 
 
41
  return self.class_id
42
-
43
  def get_countries(self) -> list:
44
- countries = st.multiselect("Select countries : " , tuple(self.data.COUNTRY_KEY.unique()), key = "countries")
 
 
 
 
45
  return countries
46
-
47
- def get_number_countries(self) -> int :
48
- nb_countries = st.number_input("Number of countries", min_value=1, max_value=20, value=1, key="Number of countries")
49
- return nb_countries
50
-
 
 
 
 
 
 
51
  def get_proportion(self) -> float:
52
- proportion = st.number_input("Proportion", min_value=0.10, max_value=1.00, value=0.75, key="proportion")
 
 
 
53
  return proportion
54
-
55
  def show_proportion(self) -> bool:
56
- show_condition = st.checkbox("Show data with ratios ", value=True, key="show_ratio_checkbox")
57
- return show_condition
 
 
 
1
  import pandas as pd
2
  import streamlit as st
 
3
 
4
 
5
  class InputsBox:
 
8
  self.columns = None
9
  self.product_id = None
10
  self.class_id = None
11
+
12
+ def get_data(self):
13
+ uploaded_file = st.file_uploader(
14
+ "Choose a CSV file with the separator ';' ", type=["csv"]
15
+ )
16
+
17
  if uploaded_file is not None:
18
+ # try :
19
+ self.data = pd.read_csv(
20
+ uploaded_file, dtype=str, sep=";", encoding="latin-1"
21
+ )
22
+
23
+ # except :
24
+ # self.data = pd.read_csv(
25
+ # uploaded_file,dtype=str,
26
+ # sep=";",
27
+ # encoding="utf-8" )
28
 
29
+ self.columns = self.data.columns.tolist()
 
 
 
30
  return self.data
31
 
32
  def valid_produict_id(self) -> int:
33
+ min_len = st.number_input(
34
+ "Minimum len of product_id",
35
+ max_value=25,
36
+ min_value=1,
37
+ value=2,
38
+ key="pp"
39
+ )
40
  return min_len
41
+
42
  def valid_class_id(self) -> str:
43
+ valid = st.text_input(
44
+ "First element of No valid class_id separed by ;"
45
+ )
46
  return valid.split(";")
47
 
48
  def get_product_id(self) -> str:
49
+ self.product_id = st.selectbox(
50
+ "product_id (BARCODE)", options=self.columns, key="product_id"
51
+ )
52
  return self.product_id
53
 
 
54
  def get_class_id(self) -> str:
55
+ self.class_id = st.selectbox(
56
+ "class_id (WW_CLASS_KEY)", options=self.columns, key="class_id"
57
+ )
58
  return self.class_id
59
+
60
  def get_countries(self) -> list:
61
+ countries = st.multiselect(
62
+ "Select countries : ",
63
+ tuple(self.data.COUNTRY_KEY.unique()),
64
+ key="countries",
65
+ )
66
  return countries
67
+
68
+ def get_number_countries(self) -> int:
69
+ nb_countries = st.number_input(
70
+ "Number of countries",
71
+ min_value=1,
72
+ max_value=20,
73
+ value=1,
74
+ key="Number of countries",
75
+ )
76
+ return nb_countries
77
+
78
  def get_proportion(self) -> float:
79
+ proportion = st.number_input(
80
+ "Proportion",
81
+ min_value=0.10, max_value=1.00, value=0.75, key="proportion"
82
+ )
83
  return proportion
84
+
85
  def show_proportion(self) -> bool:
86
+ show_condition = st.checkbox(
87
+ "Show data with ratios ", value=True, key="show_ratio_checkbox"
88
+ )
89
+ return show_condition
App/functions_rupture/functions_gestion.py CHANGED
@@ -1,174 +1,382 @@
1
  import numpy as np
2
  import pandas as pd
3
- import streamlit as st
4
- from App.utils.priorite_pays import *
 
5
  import nltk
6
- nltk.download('stopwords')
7
-
8
-
9
-
10
- def data_with_valide_key(data, product_id, class_id, min_prd_id, valid_class_id):
11
- data = data[data[product_id].str.len() > min_prd_id]
12
- try :
13
- data = data[~data[class_id].str[0].isin(valid_class_id)]
14
- except :
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  pass
16
- return data
17
-
18
 
19
- def calcul_total_par_ligne(data, produit_id, class_id):
20
 
21
- matrice = pd.crosstab(data[produit_id], data[class_id])
22
- total_by_line = matrice.sum(axis = 1)
23
-
24
- plus_k_2 = list(total_by_line[total_by_line.apply(lambda x : x > 1)].index)
25
- df = data[data[produit_id].isin(plus_k_2)]
26
- matrice = pd.crosstab(df[produit_id], df[class_id])
27
-
28
- total_by_line = matrice.sum(axis = 1)
29
- total_by_line = pd.DataFrame({produit_id : total_by_line.index , "total_by_line": total_by_line.values})
30
-
31
- return total_by_line, matrice
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  @st.cache_data
35
- def matrice_creuse(matrice, produit_id, class_id):
36
- stacked = matrice.stack()
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  non_zero = stacked[stacked != 0]
38
- ligne = non_zero.index.get_level_values(0).astype(str).tolist()
39
- colonne = non_zero.index.get_level_values(1).astype(str).tolist()
40
- valeur = non_zero.values.tolist()
41
- sparse_matrix = pd.DataFrame({produit_id : ligne, class_id : colonne, "nombre": valeur})
 
 
 
42
  return sparse_matrix
43
 
44
 
45
  @st.cache_data
46
- def nouvelle_data( data, produit_id, class_id):
47
-
48
- total_by_line, matrice = calcul_total_par_ligne(data, produit_id, class_id)
49
- sparse_matrix = matrice_creuse( matrice, produit_id,class_id)
50
- Data = pd.merge(sparse_matrix, total_by_line, on =[produit_id])
51
- Data["Proportion"] = Data.nombre / Data.total_by_line
52
- merged = Data.merge(data, left_on=[class_id, produit_id], right_on=[class_id, produit_id])
53
- try :
54
- Country = merged.groupby([class_id, produit_id])['Country'].agg(lambda x: x.tolist())
55
- except :
56
- try :
57
- Country = merged.groupby([class_id, produit_id])['COUNTRY_KEY'].agg(lambda x: x.tolist())
58
- except :
59
- try :
60
- Country = merged.groupby([class_id, produit_id])['COUNTRY'].agg(lambda x: x.tolist())
61
- except :
62
- pass
63
- return Country, merged
64
-
65
-
66
- def add_country(produit_id, class_id, Country):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return Country[produit_id, class_id]
68
 
69
 
70
- def finale_merged(merged, Country, produit_id, class_id):
71
- merged_finale = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  try:
73
- merged["Countries"] = merged.apply(lambda row: add_country(row[1], row[0], Country), axis=1)
74
- merged['Countries'] = merged['Countries'].apply(tuple)
75
- merged_finale = merged.drop_duplicates(subset=[produit_id, class_id, 'Countries'])
 
 
 
 
 
 
 
76
  except Exception as e:
77
- st.warning(f"Une erreur s'est produite : {e}")
78
- finally:
79
- return merged_finale
80
-
81
-
82
- def cond_pays_proportion(merged_finale, nb_pays, proportion, produit_id):
83
-
84
- data = merged_finale[((merged_finale.Proportion >= proportion) & (merged_finale.total_by_line >= nb_pays))]
85
- cles = data[produit_id].unique()
86
- df = merged_finale[merged_finale[produit_id].isin(cles)]
87
- return df
88
-
89
-
90
- def cond_pays_priorite(merged_finale, produit_id):
91
-
92
- data = merged_finale[((merged_finale.Proportion == 0.5) & (merged_finale.total_by_line >= 2))]
93
- cles = data[produit_id].unique()
94
- df = merged_finale[merged_finale[produit_id].isin(cles)]
95
-
96
- df.loc[:, "Poids"] = df["Countries"].apply(lambda x : np.sum([dico[y] for y in x]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- duplicated_subclass = df.duplicated(subset=[produit_id, "Poids"], keep=False)
99
- df_equa = df[duplicated_subclass]
100
- df_equa = df_equa[(df_equa.Proportion == 0.5)]
101
-
102
- df_nequa = df[~df.isin(df_equa)].dropna()
103
-
104
- return df, df_equa, df_nequa
105
-
106
-
107
- def finale_merge(data, new_data, produit_id, class_id):
108
-
109
- merged_df = pd.merge(data, new_data, on=["COUNTRY_KEY", produit_id], how="left", suffixes=("", "_y"))
110
- merged_df[class_id] = merged_df[f"{class_id}_y"].fillna(merged_df[class_id])
111
- merged_df[f"{class_id[:-4]}_DESC_FR"] = merged_df[f"{class_id[:-4]}_DESC_FR_y"].fillna(merged_df[f"{class_id[:-4]}_DESC_FR"])
112
-
113
- df_finale = merged_df[[produit_id, "COUNTRY_KEY",class_id, f"{class_id[:-4]}_DESC_FR"]]
114
-
115
- merged = pd.merge(data, df_finale, how='outer', indicator=True)
116
-
117
- data_finale = merged[merged['_merge'] != 'both']
118
-
119
- data_finale = data_finale.rename(columns={'_merge': 'Changements'})
120
-
121
- data_finale.sort_values(by =[produit_id], ascending=True, inplace =True)
122
-
123
- data_finale["Changements"] = data_finale["Changements"].apply(lambda x : "Avant" if x == "left_only" else "Après")
124
-
125
- data_finale = data_finale[[produit_id, "COUNTRY_KEY" , class_id, f"{class_id[:-4]}_DESC_FR", "Changements"]]
126
- data_finale.drop_duplicates(inplace=True)
127
-
128
- return data_finale, df_finale
129
-
130
- # brouillon
131
-
132
- def data_1_1(df_nequa, produit_id, class_id):
133
- df_nequa_2 = df_nequa[(df_nequa.Countries.apply(lambda x: len(x) > 1))]
134
- max_poids_index = df_nequa_2.groupby(produit_id)['Poids'].idxmax()
135
-
136
- # Updating columns for all rows instead of iterating over unique barcodes
137
- df_nequa_2.loc[:, class_id] = df_nequa_2.loc[max_poids_index, class_id].values
138
- df_nequa_2.loc[:, f'{class_id[:-4]}_DESC_FR'] = df_nequa_2.loc[max_poids_index, f'{class_id[:-4]}_DESC_FR'].values
139
 
140
- df_duplicate = df_nequa_2.copy()
141
- df_duplicate.Countries = df_duplicate.Countries.apply(lambda x : ','.join(x))
142
- new_rows = []
143
- for _, row in df_duplicate.iterrows():
144
- countries = row['Countries'].split(',')
145
- for country in countries:
146
- new_row = row.copy()
147
- new_row['Countries'] = country
148
- new_rows.append(new_row)
149
-
150
- new_df = pd.DataFrame(new_rows).drop_duplicates()
151
- new_df = new_df.rename(columns={'Countries': 'Country'}, errors='ignore')
152
 
153
- return new_df
 
 
 
 
154
 
 
 
 
 
155
 
156
- def data_1_FR(df, produit_id, class_id):
157
-
158
- df_f_f = df[df.Country == "FRA"]
 
 
159
 
160
- barcodes = df_f_f[produit_id].unique()
161
- max = 0
162
- Barcodes = []
163
  for barcode in barcodes:
164
- items = df_f_f.item_key[df_f_f[produit_id] == barcode].tolist()
165
  if len(items) == 2:
166
- Barcodes.append(barcode)
167
- if "R" in items[0]:
168
- df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), class_id] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), class_id].values
169
- df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), f'{class_id[:-3]}_DESC_FR'] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), f'{class_id[:-3]}_DESC_FR'].values
170
-
171
- if "R" in items[1]:
172
- df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), class_id] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), class_id].values
173
- df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), f'{class_id[:-3]}_DESC_FR'] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), f'{class_id[:-3]}_DESC_FR'].values
174
- return df_f_f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import streamlit as st
4
+ from App.utils.priorite_pays import dico
5
+ # from App.utils.divers_function import data_cleaning_func
6
  import nltk
7
+ from typing import Tuple, List
8
+
9
+ nltk.download("stopwords")
10
+
11
+
12
+ def filter_data_with_valid_keys(
13
+ data: pd.DataFrame,
14
+ product_id_col: str,
15
+ class_id_col: str,
16
+ min_product_id_length: int,
17
+ valid_class_id_prefixes: List[str],
18
+ ) -> pd.DataFrame:
19
+ """
20
+ Filter the dataframe based on product ID length and class ID prefixes.
21
+
22
+ Args:
23
+ data (pd.DataFrame): Input dataframe
24
+ product_id_col (str): Name of the product ID column
25
+ class_id_col (str): Name of the class ID column
26
+ min_product_id_length (int): Minimum length for product IDs
27
+ valid_class_id_prefixes (List[str]): List of valid prefixes for class IDs
28
+
29
+ Returns:
30
+ pd.DataFrame: Filtered dataframe
31
+ """
32
+ filtered_data = data[data[product_id_col].str.len() > min_product_id_length]
33
+ try:
34
+ filtered_data = filtered_data[
35
+ ~filtered_data[class_id_col].str[0].isin(valid_class_id_prefixes)
36
+ ]
37
+ except Exception:
38
  pass
39
+ return filtered_data
 
40
 
 
41
 
42
+ @st.cache_data
43
+ def calculate_product_class_matrix(
44
+ data: pd.DataFrame, product_id_col: str, class_id_col: str
45
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
46
+ """
47
+ Calculate the product-class matrix and total counts per product.
48
+
49
+ Args:
50
+ data (pd.DataFrame): Input dataframe
51
+ product_id_col (str): Name of the product ID column
52
+ class_id_col (str): Name of the class ID column
53
+
54
+ Returns:
55
+ Tuple[pd.DataFrame, pd.DataFrame]: Total counts per product and product-class matrix
56
+ """
57
+ matrix = pd.crosstab(data[product_id_col], data[class_id_col])
58
+ total_by_product = matrix.sum(axis=1)
59
+
60
+ products_with_multiple_classes = total_by_product[total_by_product > 1].index
61
+ filtered_data = data[data[product_id_col].isin(products_with_multiple_classes)]
62
+ matrix = pd.crosstab(filtered_data[product_id_col], filtered_data[class_id_col])
63
+
64
+ total_by_product = matrix.sum(axis=1)
65
+ total_by_product_df = pd.DataFrame(
66
+ {
67
+ product_id_col: total_by_product.index,
68
+ "total_by_product": total_by_product.values,
69
+ }
70
+ )
71
+
72
+ return total_by_product_df, matrix
73
 
74
 
75
  @st.cache_data
76
+ def create_sparse_matrix(
77
+ matrix: pd.DataFrame, product_id_col: str, class_id_col: str
78
+ ) -> pd.DataFrame:
79
+ """
80
+ Create a sparse matrix representation from the product-class matrix.
81
+
82
+ Args:
83
+ matrix (pd.DataFrame): Product-class matrix
84
+ product_id_col (str): Name of the product ID column
85
+ class_id_col (str): Name of the class ID column
86
+
87
+ Returns:
88
+ pd.DataFrame: Sparse matrix representation
89
+ """
90
+ stacked = matrix.stack()
91
  non_zero = stacked[stacked != 0]
92
+ sparse_matrix = pd.DataFrame(
93
+ {
94
+ product_id_col: non_zero.index.get_level_values(0).astype(str),
95
+ class_id_col: non_zero.index.get_level_values(1).astype(str),
96
+ "count": non_zero.values,
97
+ }
98
+ )
99
  return sparse_matrix
100
 
101
 
102
  @st.cache_data
103
+ def process_new_data(
104
+ data: pd.DataFrame, product_id_col: str, class_id_col: str
105
+ ) -> Tuple[pd.Series, pd.DataFrame]:
106
+ """
107
+ Process the data to create a new dataset with country groups and merged information.
108
+
109
+ Args:
110
+ data (pd.DataFrame): Input dataframe
111
+ product_id_col (str): Name of the product ID column
112
+ class_id_col (str): Name of the class ID column
113
+
114
+ Returns:
115
+ Tuple[pd.Series, pd.DataFrame]: Country groups and merged dataframe
116
+ """
117
+ total_by_product_df, matrix = calculate_product_class_matrix(
118
+ data, product_id_col, class_id_col
119
+ )
120
+ sparse_matrix = create_sparse_matrix(matrix, product_id_col, class_id_col)
121
+ merged_data = pd.merge(sparse_matrix, total_by_product_df, on=[product_id_col])
122
+ merged_data["Proportion"] = merged_data["count"] / merged_data["total_by_product"]
123
+ final_merged = merged_data.merge(
124
+ data,
125
+ left_on=[class_id_col, product_id_col],
126
+ right_on=[class_id_col, product_id_col],
127
+ )
128
+ try:
129
+ country_groups = final_merged.groupby([class_id_col, product_id_col])[
130
+ "Country"
131
+ ].agg(lambda x: x.tolist())
132
+ except KeyError:
133
+ try:
134
+ country_groups = final_merged.groupby([class_id_col, product_id_col])[
135
+ "COUNTRY_KEY"
136
+ ].agg(lambda x: x.tolist())
137
+ except KeyError:
138
+ country_groups = final_merged.groupby([class_id_col, product_id_col])[
139
+ "COUNTRY"
140
+ ].agg(lambda x: x.tolist())
141
+ return country_groups, final_merged
142
+
143
+
144
+ def add_country(produit_id: str, class_id: str, Country) -> List[str]:
145
+ """
146
+ Retrieve the list of countries for a given product ID and class ID.
147
+
148
+ Args:
149
+ product_id (str): The product ID
150
+ class_id (str): The class ID
151
+ country_groups (pd.Series): Series containing country groups
152
+
153
+ Returns:
154
+ List[str]: List of countries for the given product and class
155
+ """
156
  return Country[produit_id, class_id]
157
 
158
 
159
+ def finalize_merged_data(
160
+ merged: pd.DataFrame,
161
+ country_groups: pd.Series,
162
+ product_id_col: str,
163
+ class_id_col: str,
164
+ ) -> pd.DataFrame:
165
+ """
166
+ Finalize the merged data by adding country information and removing duplicates.
167
+
168
+ Args:
169
+ merged (pd.DataFrame): Merged dataframe
170
+ country_groups (pd.Series): Series containing country groups
171
+ product_id_col (str): Name of the product ID column
172
+ class_id_col (str): Name of the class ID column
173
+
174
+ Returns:
175
+ pd.DataFrame: Finalized merged dataframe
176
+ """
177
  try:
178
+ merged["Countries"] = merged.apply(
179
+ lambda row: add_country(
180
+ row[1], row[0], country_groups
181
+ ),
182
+ axis=1,
183
+ )
184
+ merged["Countries"] = merged["Countries"].apply(tuple)
185
+ final_merged = merged.drop_duplicates(
186
+ subset=[product_id_col, class_id_col, "Countries"]
187
+ )
188
  except Exception as e:
189
+ st.warning(f"An error occurred: {e}")
190
+ final_merged = None
191
+ return final_merged
192
+
193
+
194
+ def filter_by_country_and_proportion(
195
+ merged_data: pd.DataFrame,
196
+ min_countries: int,
197
+ min_proportion: float,
198
+ product_id_col: str,
199
+ ) -> pd.DataFrame:
200
+ """
201
+ Filter the merged data based on minimum number of countries and proportion.
202
+
203
+ Args:
204
+ merged_data (pd.DataFrame): Merged dataframe
205
+ min_countries (int): Minimum number of countries required
206
+ min_proportion (float): Minimum proportion required
207
+ product_id_col (str): Name of the product ID column
208
+
209
+ Returns:
210
+ pd.DataFrame: Filtered dataframe
211
+ """
212
+ filtered_data = merged_data[
213
+ (merged_data.Proportion >= min_proportion)
214
+ & (merged_data.total_by_product >= min_countries)
215
+ ]
216
+ product_keys = filtered_data[product_id_col].unique()
217
+ result_df = merged_data[merged_data[product_id_col].isin(product_keys)]
218
+ return result_df
219
+
220
+
221
+ def process_country_priority(
222
+ merged_data: pd.DataFrame, product_id_col: str
223
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
224
+ """
225
+ Process the merged data based on country priority.
226
+
227
+ Args:
228
+ merged_data (pd.DataFrame): Merged dataframe
229
+ product_id_col (str): Name of the product ID column
230
+
231
+ Returns:
232
+ Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Processed dataframes (all, equal weight, non-equal weight)
233
+ """
234
+ data = merged_data[
235
+ (merged_data.Proportion == 0.5) & (merged_data.total_by_product >= 2)
236
+ ]
237
+ product_keys = data[product_id_col].unique()
238
+ df = merged_data[merged_data[product_id_col].isin(product_keys)]
239
+
240
+ df["Weight"] = df["Countries"].apply(lambda x: sum(dico[y] for y in x))
241
+
242
+ duplicated_subclass = df.duplicated(subset=[product_id_col, "Weight"], keep=False)
243
+ df_equal = df[duplicated_subclass & (df.Proportion == 0.5)]
244
+ df_not_equal = df[~df.isin(df_equal)].dropna()
245
+
246
+ return df, df_equal, df_not_equal
247
+
248
+
249
+ def merge_final_data(
250
+ original_data: pd.DataFrame,
251
+ new_data: pd.DataFrame,
252
+ product_id_col: str,
253
+ class_id_col: str,
254
+ ) -> Tuple[pd.DataFrame, pd.DataFrame]:
255
+ """
256
+ Merge the original data with the new processed data.
257
+
258
+ Args:
259
+ original_data (pd.DataFrame): Original dataframe
260
+ new_data (pd.DataFrame): New processed dataframe
261
+ product_id_col (str): Name of the product ID column
262
+ class_id_col (str): Name of the class ID column
263
+
264
+ Returns:
265
+ Tuple[pd.DataFrame, pd.DataFrame]: Final merged data and changes summary
266
+ """
267
+ merged_df = pd.merge(
268
+ original_data,
269
+ new_data,
270
+ on=["COUNTRY_KEY", product_id_col],
271
+ how="left",
272
+ suffixes=("", "_y"),
273
+ )
274
+ merged_df[class_id_col] = merged_df[f"{class_id_col}_y"].fillna(
275
+ merged_df[class_id_col]
276
+ )
277
+ merged_df[f"{class_id_col[:-4]}_DESC_FR"] = merged_df[
278
+ f"{class_id_col[:-4]}_DESC_FR_y"
279
+ ].fillna(merged_df[f"{class_id_col[:-4]}_DESC_FR"])
280
+
281
+ df_final = merged_df[
282
+ [product_id_col, "COUNTRY_KEY", class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
283
+ ]
284
+
285
+ merged = pd.merge(original_data, df_final, how="outer", indicator=True)
286
+ data_final = merged[merged["_merge"] != "both"]
287
+ data_final = data_final.rename(columns={"_merge": "Changes"})
288
+ data_final.sort_values(by=[product_id_col], ascending=True, inplace=True)
289
+ data_final["Changes"] = data_final["Changes"].apply(
290
+ lambda x: "Before" if x == "left_only" else "After"
291
+ )
292
+ data_final = data_final[
293
+ [
294
+ product_id_col,
295
+ "COUNTRY_KEY",
296
+ class_id_col,
297
+ f"{class_id_col[:-4]}_DESC_FR",
298
+ "Changes",
299
+ ]
300
+ ]
301
+ data_final.drop_duplicates(inplace=True)
302
+
303
+ return data_final, df_final
304
+
305
+
306
+ def process_non_equal_data(
307
+ df_not_equal: pd.DataFrame, product_id_col: str, class_id_col: str
308
+ ) -> pd.DataFrame:
309
+ """
310
+ Process data with non-equal weights, selecting the classification with the highest weight.
311
+
312
+ Args:
313
+ df_not_equal (pd.DataFrame): Dataframe with non-equal weights
314
+ product_id_col (str): Name of the product ID column
315
+ class_id_col (str): Name of the class ID column
316
+
317
+ Returns:
318
+ pd.DataFrame: Processed dataframe with selected classifications
319
+ """
320
+ df_multi_country = df_not_equal[df_not_equal.Countries.apply(len) > 1]
321
+ max_weight_index = df_multi_country.groupby(product_id_col)["Weight"].idxmax()
322
+
323
+ df_multi_country.loc[:, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]] = (
324
+ df_multi_country.loc[
325
+ max_weight_index, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
326
+ ].values
327
+ )
328
+
329
+ df_duplicate = df_multi_country.copy()
330
+ df_duplicate.Countries = df_duplicate.Countries.str.join(",")
331
+
332
+ new_df = (
333
+ df_duplicate.explode("Countries")
334
+ .rename(columns={"Countries": "Country"})
335
+ .drop_duplicates()
336
+ )
337
 
338
+ return new_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
+ def process_france_data(
342
+ df: pd.DataFrame, product_id_col: str, class_id_col: str
343
+ ) -> pd.DataFrame:
344
+ """
345
+ Process data specific to France, handling special cases for item keys.
346
 
347
+ Args:
348
+ df (pd.DataFrame): Input dataframe
349
+ product_id_col (str): Name of the product ID column
350
+ class_id_col (str): Name of the class ID column
351
 
352
+ Returns:
353
+ pd.DataFrame: Processed dataframe for France
354
+ """
355
+ df_france = df[df.Country == "FRA"]
356
+ barcodes = df_france[product_id_col].unique()
357
 
 
 
 
358
  for barcode in barcodes:
359
+ items = df_france.item_key[df_france[product_id_col] == barcode].tolist()
360
  if len(items) == 2:
361
+ if "R" in items[0]:
362
+ df_france.loc[
363
+ (df_france[product_id_col] == barcode)
364
+ & (df_france.item_key == items[0]),
365
+ [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
366
+ ] = df_france.loc[
367
+ (df_france[product_id_col] == barcode)
368
+ & (df_france.item_key == items[1]),
369
+ [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
370
+ ].values
371
+ elif "R" in items[1]:
372
+ df_france.loc[
373
+ (df_france[product_id_col] == barcode)
374
+ & (df_france.item_key == items[1]),
375
+ [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
376
+ ] = df_france.loc[
377
+ (df_france[product_id_col] == barcode)
378
+ & (df_france.item_key == items[0]),
379
+ [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
380
+ ].values
381
+
382
+ return df_france
App/utils/divers_function.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import re
4
  from sklearn.feature_extraction.text import CountVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
-
7
  from App.utils.standadisation import *
8
  from nltk.corpus import stopwords
9
  from nltk.stem import PorterStemmer
@@ -13,82 +13,123 @@ from nltk.corpus import stopwords
13
 
14
  @st.cache_data
15
  def convert_df(df):
16
- return df.to_csv().encode('utf-8')
 
17
 
18
  @st.cache_data
19
  def supprime_country(df):
20
- try :
21
- df.drop(["Country"], axis = 1, inplace = True)
22
- except :
23
- try :
24
- df.drop(["COUNTRY_KEY"], axis = 1, inplace = True)
25
- except :
26
- try :
27
- df.drop(["COUNTRY"], axis = 1, inplace = True)
28
- except :
29
- pass
30
  return df
31
 
32
 
33
- def Merger(df, data_tr, produit_id, class_id):
34
- keys = data_tr[produit_id].unique()
35
- df_finale_v1 = df[df[produit_id].isin(keys)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- df_finale_v1.loc[:,f'old_{class_id}'] = df_finale_v1.loc[:,class_id]
 
 
 
 
 
 
 
 
38
 
39
- merged_df = pd.merge(df_finale_v1, data_tr, on=[produit_id], how='inner', indicator=True)
 
 
 
40
 
41
- merged_df[class_id] = merged_df[f'{class_id}_x'].fillna(merged_df[f'old_{class_id}'])
42
-
43
- # Filtrer les lignes où 'class_id' a été modifié
44
- merged_df = merged_df[merged_df[f'{class_id}_x'] != merged_df[f'{class_id}_y']]
45
- finale_df = merged_df.drop(["_merge"], axis = 1) #[["COUNTRY_KEY" ,produit_id,"ITEM_DESC_x",f"old_{class_id}",f'{class_id[:-4]}_DESC_FR_x', f'{class_id}_y', f'{class_id[:-4]}_DESC_FR_y',"ITEM_DESC_y","nombre","total_by_line", "Proportion", "Countries","Poids"]]
46
- return finale_df
47
 
 
 
48
 
 
49
 
50
- def data_cleaning(strings):
 
51
 
52
  strings = strings.lower().strip()
53
- strings = strings.replace('\'',' ')
54
- strings = strings.replace('/',' ')
55
- strings = re.sub(r'[^\w\s]', ' ', strings)
56
- text_normalized = re.sub('[^A-Za-z ,éêèîôœàâ]+', ' ', strings)
57
 
58
  return text_normalized
59
 
60
 
61
- def standardization(strings):
62
- liste = strings.split(' ')
63
- for i in range(len(liste)) :
64
- if liste[i] in dictionnaire.keys():
65
- liste[i] = dictionnaire[liste[i]]
66
- return ' '.join(liste)
67
 
68
 
69
- def remove_stop_words(strings):
70
  liste_stopword_unicode = [str(item) for item in liste_stopword]
71
- en_stops = set(stopwords.words('english') + liste_stopword_unicode)
72
- fr_stops = set(stopwords.words('french') + liste_stopword_unicode)
73
 
74
- list_DESCRIPTION = strings.split(' ')
75
  cleaned_list = []
76
 
77
  for ingredient in list_DESCRIPTION:
78
- temp = ingredient.split(' ')
79
- cleaned_ingredient = ' '.join([word for word in temp if word.lower() not in en_stops])
 
 
80
  cleaned_list.append(cleaned_ingredient)
81
 
82
- strings = ' '.join([ingredient for ingredient in cleaned_list])
83
- list_DESCRIPTION = strings.split(' ')
84
  cleaned_list = []
85
 
86
  for ingredient in list_DESCRIPTION:
87
- temp = ingredient.split(' ')
88
- cleaned_ingredient = ' '.join([word for word in temp if word.lower() not in fr_stops])
 
 
89
  cleaned_list.append(cleaned_ingredient)
90
 
91
- strings = ' '.join([ingredient for ingredient in cleaned_list])
92
  return strings
93
 
94
 
@@ -97,27 +138,31 @@ fr_stemmer = FrenchStemmer()
97
 
98
 
99
  def stem_sentence(sentence, stemmer):
100
- words = sentence.split(' ')
101
  stemmed_words = [stemmer.stem(word) for word in words]
102
- stemmed_sentence = ' '.join(stemmed_words)
103
  return stemmed_sentence
104
 
105
 
106
  def english_stemmer(strings):
107
- list_ingredients = strings.split(' ')
108
- stemmed_list = [stem_sentence(ingredient, en_stemmer) for ingredient in list_ingredients]
109
- strings = ' '.join(stemmed_list)
 
 
110
  return strings
111
 
112
 
113
  def french_stemmer(strings):
114
- list_ingredients = strings.split(',')
115
- stemmed_list = [stem_sentence(ingredient, fr_stemmer) for ingredient in list_ingredients]
116
- strings = ' '.join(stemmed_list)
 
 
117
  return strings
118
 
119
 
120
- def cosine_similarity_between_expressions(expr1, expr2):
121
 
122
  vectorizer = CountVectorizer()
123
  vectors = vectorizer.fit_transform([expr1, expr2])
@@ -125,35 +170,88 @@ def cosine_similarity_between_expressions(expr1, expr2):
125
 
126
  return similarity[0][0]
127
 
128
- def ajout_simularite(data) :
129
- data["ITEM_DESC_avant_clean"] = data["ITEM_DESC_x"].apply(data_cleaning)
130
- data["ITEM_DESC_apres_clean"] = data["ITEM_DESC_y"].apply(data_cleaning)
131
-
132
- stop = stopwords.words('french')
133
- data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
134
- data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
135
-
136
- stop = stopwords.words('english')
137
- data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
138
- data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
139
 
140
- data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(remove_stop_words)
141
- data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(remove_stop_words)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(standardization)
144
- data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(standardization)
145
-
146
- data["Cosinus similarité"] = data.apply(lambda row: cosine_similarity_between_expressions(row['ITEM_DESC_apres_clean'], row['ITEM_DESC_avant_clean']), axis=1)
147
-
148
- return data
149
 
150
 
151
- def display_data_with_download_button(df, title="Data without decision-making"):
152
- if df.empty :
 
 
 
153
  st.write("No result for the above criterion ")
154
- else :
155
  st.subheader(title)
156
  df.loc[:, "Evaluation"] = True
157
  edited_df = st.data_editor(df)
158
  csv_data = convert_df(edited_df)
159
- st.download_button(label="Download data as CSV", data=csv_data, file_name=f'{title}.csv', mime='text/csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import re
4
  from sklearn.feature_extraction.text import CountVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
+ from typing import Callable
7
  from App.utils.standadisation import *
8
  from nltk.corpus import stopwords
9
  from nltk.stem import PorterStemmer
 
13
 
14
  @st.cache_data
15
  def convert_df(df):
16
+ return df.to_csv().encode("utf-8")
17
+
18
 
19
  @st.cache_data
20
  def supprime_country(df):
21
+ try:
22
+ df.drop(["Country"], axis=1, inplace=True)
23
+ except:
24
+ try:
25
+ df.drop(["COUNTRY_KEY"], axis=1, inplace=True)
26
+ except:
27
+ try:
28
+ df.drop(["COUNTRY"], axis=1, inplace=True)
29
+ except:
30
+ pass
31
  return df
32
 
33
 
34
+ def merge_and_update_classification(
35
+ main_df, update_df, product_id_col, classification_col
36
+ ):
37
+ """
38
+ Merge two DataFrames and update the classification based on the update_df.
39
+ Only rows where the classification has changed are retained.
40
+
41
+ Args:
42
+ main_df (pd.DataFrame): The main DataFrame containing original data.
43
+ update_df (pd.DataFrame): DataFrame containing updated classifications.
44
+ product_id_col (str): Name of the column used as the product identifier.
45
+ classification_col (str): Name of the classification column to be updated.
46
+
47
+ Returns:
48
+ pd.DataFrame: A DataFrame containing only the rows where classification was updated.
49
+ """
50
+ # Get unique product IDs from the update DataFrame
51
+ update_product_ids = update_df[product_id_col].unique()
52
+
53
+ # Filter main DataFrame to include only products in the update DataFrame
54
+ filtered_main_df = main_df[main_df[product_id_col].isin(update_product_ids)]
55
+
56
+ # Preserve the original classification
57
+ original_classification_col = f"original_{classification_col}"
58
+ filtered_main_df[original_classification_col] = filtered_main_df[classification_col]
59
 
60
+ # Merge DataFrames
61
+ merged_df = pd.merge(
62
+ filtered_main_df,
63
+ update_df,
64
+ on=[product_id_col],
65
+ how="inner",
66
+ suffixes=("_main", "_update"),
67
+ indicator=True,
68
+ )
69
 
70
+ # Update classification, keeping original if update is NaN
71
+ merged_df[classification_col] = merged_df[f"{classification_col}_update"].fillna(
72
+ merged_df[original_classification_col]
73
+ )
74
 
75
+ # Keep only rows where classification has changed
76
+ updated_df = merged_df[
77
+ merged_df[f"{classification_col}_main"]
78
+ != merged_df[f"{classification_col}_update"]
79
+ ]
 
80
 
81
+ # Remove merge indicator column
82
+ final_df = updated_df.drop(columns=["_merge"])
83
 
84
+ return final_df
85
 
86
+
87
+ def data_cleaning_func(strings):
88
 
89
  strings = strings.lower().strip()
90
+ strings = strings.replace("'", " ")
91
+ strings = strings.replace("/", " ")
92
+ strings = re.sub(r"[^\w\s]", " ", strings)
93
+ text_normalized = re.sub("[^A-Za-z ,éêèîôœàâ]+", " ", strings)
94
 
95
  return text_normalized
96
 
97
 
98
+ def standardization_func(strings):
99
+ liste = strings.split(" ")
100
+ for i in range(len(liste)):
101
+ if liste[i] in dictionnaire.keys():
102
+ liste[i] = dictionnaire[liste[i]]
103
+ return " ".join(liste)
104
 
105
 
106
+ def remove_stop_words_func(strings):
107
  liste_stopword_unicode = [str(item) for item in liste_stopword]
108
+ en_stops = set(stopwords.words("english") + liste_stopword_unicode)
109
+ fr_stops = set(stopwords.words("french") + liste_stopword_unicode)
110
 
111
+ list_DESCRIPTION = strings.split(" ")
112
  cleaned_list = []
113
 
114
  for ingredient in list_DESCRIPTION:
115
+ temp = ingredient.split(" ")
116
+ cleaned_ingredient = " ".join(
117
+ [word for word in temp if word.lower() not in en_stops]
118
+ )
119
  cleaned_list.append(cleaned_ingredient)
120
 
121
+ strings = " ".join([ingredient for ingredient in cleaned_list])
122
+ list_DESCRIPTION = strings.split(" ")
123
  cleaned_list = []
124
 
125
  for ingredient in list_DESCRIPTION:
126
+ temp = ingredient.split(" ")
127
+ cleaned_ingredient = " ".join(
128
+ [word for word in temp if word.lower() not in fr_stops]
129
+ )
130
  cleaned_list.append(cleaned_ingredient)
131
 
132
+ strings = " ".join([ingredient for ingredient in cleaned_list])
133
  return strings
134
 
135
 
 
138
 
139
 
140
  def stem_sentence(sentence, stemmer):
141
+ words = sentence.split(" ")
142
  stemmed_words = [stemmer.stem(word) for word in words]
143
+ stemmed_sentence = " ".join(stemmed_words)
144
  return stemmed_sentence
145
 
146
 
147
  def english_stemmer(strings):
148
+ list_ingredients = strings.split(" ")
149
+ stemmed_list = [
150
+ stem_sentence(ingredient, en_stemmer) for ingredient in list_ingredients
151
+ ]
152
+ strings = " ".join(stemmed_list)
153
  return strings
154
 
155
 
156
  def french_stemmer(strings):
157
+ list_ingredients = strings.split(",")
158
+ stemmed_list = [
159
+ stem_sentence(ingredient, fr_stemmer) for ingredient in list_ingredients
160
+ ]
161
+ strings = " ".join(stemmed_list)
162
  return strings
163
 
164
 
165
+ def cosine_similarity_func(expr1, expr2):
166
 
167
  vectorizer = CountVectorizer()
168
  vectors = vectorizer.fit_transform([expr1, expr2])
 
170
 
171
  return similarity[0][0]
172
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ def add_text_similarity(
175
+ df: pd.DataFrame,
176
+ data_cleaning_func: Callable = data_cleaning_func,
177
+ remove_stop_words_func: Callable = remove_stop_words_func,
178
+ standardization_func: Callable = standardization_func,
179
+ cosine_similarity_func: Callable = cosine_similarity_func,
180
+ ) -> pd.DataFrame:
181
+ """
182
+ Add text similarity measures to the DataFrame based on item descriptions.
183
+
184
+ Args:
185
+ df (pd.DataFrame): Input DataFrame containing item descriptions.
186
+ data_cleaning_func (Callable): Function to clean the text data.
187
+ remove_stop_words_func (Callable): Function to remove stop words.
188
+ standardization_func (Callable): Function to standardize text.
189
+ cosine_similarity_func (Callable): Function to calculate cosine similarity.
190
+
191
+ Returns:
192
+ pd.DataFrame: DataFrame with added text similarity measures.
193
+ """
194
+ # Clean item descriptions
195
+ df["ITEM_DESC_before_clean"] = df["ITEM_DESC_main"].apply(
196
+ data_cleaning_func
197
+ )
198
+ df["ITEM_DESC_after_clean"] = df["ITEM_DESC_update"].apply(
199
+ data_cleaning_func
200
+ )
201
+
202
+ # Remove stop words (French and English)
203
+ for language in ["french", "english"]:
204
+ stop_words = set(stopwords.words(language))
205
+ for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
206
+ df[col] = df[col].apply(
207
+ lambda x: " ".join(
208
+ word for word in x.split() if word.lower() not in stop_words
209
+ )
210
+ )
211
+
212
+ # Apply custom stop words removal
213
+ for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
214
+ df[col] = df[col].apply(remove_stop_words_func)
215
+
216
+ # Standardize text
217
+ for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
218
+ df[col] = df[col].apply(standardization_func)
219
+
220
+ # Calculate cosine similarity
221
+ df["Cosine_Similarity"] = df.apply(
222
+ lambda row: cosine_similarity_func(
223
+ row["ITEM_DESC_after_clean"], row["ITEM_DESC_before_clean"]
224
+ ),
225
+ axis=1,
226
+ )
227
 
228
+ return df
 
 
 
 
 
229
 
230
 
231
+ def display_data_with_download_button(
232
+ df,
233
+ title="Data without decision-making"
234
+ ) -> None:
235
+ if df.empty:
236
  st.write("No result for the above criterion ")
237
+ else:
238
  st.subheader(title)
239
  df.loc[:, "Evaluation"] = True
240
  edited_df = st.data_editor(df)
241
  csv_data = convert_df(edited_df)
242
+ try:
243
+ st.download_button(
244
+ label="Download data as CSV",
245
+ data=csv_data,
246
+ file_name=f"{title}.csv",
247
+ mime="text/csv",
248
+ key=title,
249
+ )
250
+ except:
251
+ st.download_button(
252
+ label="Download data as CSV",
253
+ data=csv_data,
254
+ file_name=f"{title}.csv",
255
+ mime="text/csv",
256
+ key=title + "1",
257
+ )
App/utils/filter_dataframe.py CHANGED
@@ -1,4 +1,5 @@
1
  import pandas as pd
 
2
  import streamlit as st
3
  from pandas.api.types import (
4
  is_categorical_dtype,
@@ -8,8 +9,7 @@ from pandas.api.types import (
8
  )
9
 
10
 
11
-
12
- def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
13
  """
14
  Adds a UI on top of a dataframe to let viewers filter columns
15
 
@@ -19,7 +19,10 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
19
  Returns:
20
  pd.DataFrame: Filtered dataframe
21
  """
22
- modify = st.checkbox("Add filters")
 
 
 
23
 
24
  if not modify:
25
  return df
@@ -30,7 +33,7 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
30
  for col in df.columns:
31
  if is_object_dtype(df[col]):
32
  try:
33
- df[col] = pd.to_datetime(df[col])
34
  except Exception:
35
  pass
36
 
@@ -40,7 +43,11 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
40
  modification_container = st.container()
41
 
42
  with modification_container:
43
- to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
 
 
 
 
44
  for column in to_filter_columns:
45
  left, right = st.columns((1, 20))
46
  left.write("↳")
@@ -73,7 +80,12 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
73
  ),
74
  )
75
  if len(user_date_input) == 2:
76
- user_date_input = tuple(map(pd.to_datetime, user_date_input))
 
 
 
 
 
77
  start_date, end_date = user_date_input
78
  df = df.loc[df[column].between(start_date, end_date)]
79
  else:
@@ -81,6 +93,6 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
81
  f"Substring or regex in {column}",
82
  )
83
  if user_text_input:
84
- df = df[df[column].str.contains(user_text_input)]
85
 
86
  return df
 
1
  import pandas as pd
2
+ import numpy as np
3
  import streamlit as st
4
  from pandas.api.types import (
5
  is_categorical_dtype,
 
9
  )
10
 
11
 
12
+ def filter_dataframe(df: pd.DataFrame, key: str = "filter_dataframe_on") -> pd.DataFrame:
 
13
  """
14
  Adds a UI on top of a dataframe to let viewers filter columns
15
 
 
19
  Returns:
20
  pd.DataFrame: Filtered dataframe
21
  """
22
+ modify = st.checkbox(
23
+ "Add filters",
24
+ key=key + "checkbox"
25
+ )
26
 
27
  if not modify:
28
  return df
 
33
  for col in df.columns:
34
  if is_object_dtype(df[col]):
35
  try:
36
+ df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
37
  except Exception:
38
  pass
39
 
 
43
  modification_container = st.container()
44
 
45
  with modification_container:
46
+ to_filter_columns = st.multiselect(
47
+ "Filter dataframe on",
48
+ df.columns,
49
+ key=key + "multiselect"
50
+ )
51
  for column in to_filter_columns:
52
  left, right = st.columns((1, 20))
53
  left.write("↳")
 
80
  ),
81
  )
82
  if len(user_date_input) == 2:
83
+ user_date_input = tuple(
84
+ map(
85
+ pd.to_datetime,
86
+ user_date_input
87
+ )
88
+ )
89
  start_date, end_date = user_date_input
90
  df = df.loc[df[column].between(start_date, end_date)]
91
  else:
 
93
  f"Substring or regex in {column}",
94
  )
95
  if user_text_input:
96
+ df = df[df[column].astype(str).str.contains(user_text_input, case=False, na=False)]
97
 
98
  return df
App/utils/priorite_pays.py CHANGED
@@ -1,10 +1,54 @@
1
-
2
-
3
- pays_all = ["FRA", "BEL" ,"ESP","ITA", "BRA","ATA","ARG", "POL", "ROU", "BIG","SAM",
4
- "UAE","SAU","KWT","OMN","BHR","QAT","JOR","EGY","ARM","UZB","IRN","KEN","GEO","LEB","UGA","PAK","IRQ",
5
- "MTQ","GLP","REU","GUA","MTS","GLS","GUF","MTA","GLA","GUS","SXM","DOM",
6
- "MAR","AMA","TUN","DZA","TUR","IAP","IET","TWN"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  dico = {}
9
  for i in pays_all:
10
- dico[i] = len(pays_all) - pays_all.index(i)
 
1
+ pays_all = [
2
+ "FRA",
3
+ "BEL",
4
+ "ESP",
5
+ "ITA",
6
+ "BRA",
7
+ "ATA",
8
+ "ARG",
9
+ "POL",
10
+ "ROU",
11
+ "BIG",
12
+ "SAM",
13
+ "UAE",
14
+ "SAU",
15
+ "KWT",
16
+ "OMN",
17
+ "BHR",
18
+ "QAT",
19
+ "JOR",
20
+ "EGY",
21
+ "ARM",
22
+ "UZB",
23
+ "IRN",
24
+ "KEN",
25
+ "GEO",
26
+ "LEB",
27
+ "UGA",
28
+ "PAK",
29
+ "IRQ",
30
+ "MTQ",
31
+ "GLP",
32
+ "REU",
33
+ "GUA",
34
+ "MTS",
35
+ "GLS",
36
+ "GUF",
37
+ "MTA",
38
+ "GLA",
39
+ "GUS",
40
+ "SXM",
41
+ "DOM",
42
+ "MAR",
43
+ "AMA",
44
+ "TUN",
45
+ "DZA",
46
+ "TUR",
47
+ "IAP",
48
+ "IET",
49
+ "TWN",
50
+ ]
51
 
52
  dico = {}
53
  for i in pays_all:
54
+ dico[i] = len(pays_all) - pays_all.index(i)
App/utils/standadisation.py CHANGED
@@ -1,17 +1,125 @@
1
- dictionnaire = {"rg": "rouge","rges" : "rouge","rge": "rouge", "rse": "rose" ,"rs" : "rose", "bl": "blanc", "bdx": "Bordeaux",
2
- "vdt": "vin de table", 'vdp': "vin de pays","blc": "blanc", "bib": "bag in box", "citr": "citron", "co": "coco", "gourm" : "gourmand",
3
- "patis": "patisserie", "p'tits" : "petit", "p'tit": "petit","p tit": "petit", "pt": "pepite", "rev": "revil","succ": "sucettes",
4
- "succet": "sucettes", "chocohouse": "choco house", "sach": "sachet", "choc": "choco", "tab" : "tablette", "hte" : "haute",
5
- "spagh" : "spaghetti", "scht": "sachet", "nr": "noir", "caf": "cafe","barr": "barre", "pces": "pieces","pc": "pieces", "acidu": "acidule","blnc": "blanc",
6
- "frui" : "fruit", "gourman" : "gourmand","bte" : "boîte", "bt" : "boîte", "ptit": "petit", "corb": "corbeil","ptits": "petit", "pti": "petit", "nois": "noisette",
7
- "poul": "poulain", "barq" : "barquette", "barqu" : "barquette", 'fizz': 'fizzy', "st": "saint", "mich": "michel", "cal" : "calendrier", "calend" : "calendrier",
8
- "calendr" : "calendrier", "caram" : "caramel", "cava" : "cavalier", "har" : "haribo", 'choc' : "chocolat", "choco" :"chocolat", 'lt' : "lait", "choc'n" :"chocolat noir",
9
- "choc n" :"chocolat noir", "degust" : "degustation", "degus" : "degustation", "bis" : "biscuit", "coffr" : "coffret", "coff" : "coffret", "conf" : "confiserie",
10
- "confis" : "confiserie", "croco" : "crocodile", "dble" : "double", "dess" : "dessert", "doyp" : "doypack", "harib" : "harib" , "et" : "etui", "exc" : "excellence",
11
- "excel" : "excellence", "frit" : "friture","fritu" : "friture","fritur" : "friture", "gd" : "grand", "gr" : "grand", "grd" : "grand", "grchoc" : "grand chocolat", "lat" : "lait", 'ass' : "assorti", "assoti" :"assorti",
12
- "noug" : "nougatine", "nougat" : "nougatine", "scht" : "sachet", "sct" : "secret", "cho" : "chocolat" , "bisc" : "biscuit", "am" : "amande", "liq" : "liqueur", "tabl" : "tablette","asst":"assorti",
13
- "tab" : "tablette", "bil" : "bille", "vali" : "valisette", "cda" : "chevaliers d argouges", "tub": "tubo", "gril" :"grille", "amandesgrilles" : "amandes grilles", "ball" : "ballotin",
14
- "piecestubo" : "pieces tubo"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- liste_stopword = ['oz', 'kg', 'g', 'lb', 'mg', 'l', 'cl', 'ml', 'tsp', 'tbsp', 'cm', 'x', 'cte', 'h',"unknown"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dictionnaire = {
2
+ "rg": "rouge",
3
+ "rges": "rouge",
4
+ "rge": "rouge",
5
+ "rse": "rose",
6
+ "rs": "rose",
7
+ "bl": "blanc",
8
+ "bdx": "Bordeaux",
9
+ "vdt": "vin de table",
10
+ "vdp": "vin de pays",
11
+ "blc": "blanc",
12
+ "bib": "bag in box",
13
+ "citr": "citron",
14
+ "co": "coco",
15
+ "gourm": "gourmand",
16
+ "patis": "patisserie",
17
+ "p'tits": "petit",
18
+ "p'tit": "petit",
19
+ "p tit": "petit",
20
+ "pt": "pepite",
21
+ "rev": "revil",
22
+ "succ": "sucettes",
23
+ "succet": "sucettes",
24
+ "chocohouse": "choco house",
25
+ "sach": "sachet",
26
+ "tab": "tablette",
27
+ "hte": "haute",
28
+ "spagh": "spaghetti",
29
+ "scht": "sachet",
30
+ "nr": "noir",
31
+ "caf": "cafe",
32
+ "barr": "barre",
33
+ "pces": "pieces",
34
+ "pc": "pieces",
35
+ "acidu": "acidule",
36
+ "blnc": "blanc",
37
+ "frui": "fruit",
38
+ "gourman": "gourmand",
39
+ "bte": "boîte",
40
+ "bt": "boîte",
41
+ "ptit": "petit",
42
+ "corb": "corbeil",
43
+ "ptits": "petit",
44
+ "pti": "petit",
45
+ "nois": "noisette",
46
+ "poul": "poulain",
47
+ "barq": "barquette",
48
+ "barqu": "barquette",
49
+ "fizz": "fizzy",
50
+ "st": "saint",
51
+ "mich": "michel",
52
+ "cal": "calendrier",
53
+ "calend": "calendrier",
54
+ "calendr": "calendrier",
55
+ "caram": "caramel",
56
+ "cava": "cavalier",
57
+ "har": "haribo",
58
+ "choc": "chocolat",
59
+ "choco": "chocolat",
60
+ "lt": "lait",
61
+ "choc'n": "chocolat noir",
62
+ "choc n": "chocolat noir",
63
+ "degust": "degustation",
64
+ "degus": "degustation",
65
+ "bis": "biscuit",
66
+ "coffr": "coffret",
67
+ "coff": "coffret",
68
+ "conf": "confiserie",
69
+ "confis": "confiserie",
70
+ "croco": "crocodile",
71
+ "dble": "double",
72
+ "dess": "dessert",
73
+ "doyp": "doypack",
74
+ "harib": "harib",
75
+ "et": "etui",
76
+ "exc": "excellence",
77
+ "excel": "excellence",
78
+ "frit": "friture",
79
+ "fritu": "friture",
80
+ "fritur": "friture",
81
+ "gd": "grand",
82
+ "gr": "grand",
83
+ "grd": "grand",
84
+ "grchoc": "grand chocolat",
85
+ "lat": "lait",
86
+ "ass": "assorti",
87
+ "assoti": "assorti",
88
+ "noug": "nougatine",
89
+ "nougat": "nougatine",
90
+ "scht": "sachet",
91
+ "sct": "secret",
92
+ "cho": "chocolat",
93
+ "bisc": "biscuit",
94
+ "am": "amande",
95
+ "liq": "liqueur",
96
+ "tabl": "tablette",
97
+ "asst": "assorti",
98
+ "tab": "tablette",
99
+ "bil": "bille",
100
+ "vali": "valisette",
101
+ "cda": "chevaliers d argouges",
102
+ "tub": "tubo",
103
+ "gril": "grille",
104
+ "amandesgrilles": "amandes grilles",
105
+ "ball": "ballotin",
106
+ "piecestubo": "pieces tubo",
107
+ }
108
 
109
+ liste_stopword = [
110
+ "oz",
111
+ "kg",
112
+ "g",
113
+ "lb",
114
+ "mg",
115
+ "l",
116
+ "cl",
117
+ "ml",
118
+ "tsp",
119
+ "tbsp",
120
+ "cm",
121
+ "x",
122
+ "cte",
123
+ "h",
124
+ "unknown",
125
+ ]
app.py CHANGED
@@ -1,13 +1,12 @@
1
- import streamlit as st
2
  import requests
3
 
4
  # Configuration
5
  st.set_page_config(
6
  page_title="Recherche",
7
- page_icon="images/logo.png",
8
- layout="wide",
9
- initial_sidebar_state="auto"
10
-
11
  )
12
  change_footer_style = """
13
  <style>
@@ -27,7 +26,6 @@ def get_product_info(EAN):
27
  return {"error": "Product not found"}
28
 
29
 
30
-
31
  """ Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
32
 
33
  Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
@@ -35,7 +33,7 @@ Ici, vous pouvez rechercher des informations détaillées sur une multitude de p
35
  Profitez de votre exploration! 🕵️‍♀️
36
  """
37
  # Test de la fonction
38
- EAN =st.text_input("EAN", '0737628064502') # remplacer par l'EAN du produit
39
- if EAN :
40
  product_info = get_product_info(EAN)
41
- st.json(product_info)
 
1
+ import streamlit as st
2
  import requests
3
 
4
  # Configuration
5
  st.set_page_config(
6
  page_title="Recherche",
7
+ page_icon="images/logo.png",
8
+ layout="wide",
9
+ initial_sidebar_state="auto",
 
10
  )
11
  change_footer_style = """
12
  <style>
 
26
  return {"error": "Product not found"}
27
 
28
 
 
29
  """ Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
30
 
31
  Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
 
33
  Profitez de votre exploration! 🕵️‍♀️
34
  """
35
  # Test de la fonction
36
+ EAN = st.text_input("EAN", "0737628064502") # remplacer par l'EAN du produit
37
+ if EAN:
38
  product_info = get_product_info(EAN)
39
+ st.json(product_info)
pages/🤖_Gestion_de_rupture_famille.py CHANGED
@@ -1,170 +1,262 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import time
4
- from App.class_input_box.input_box import *
5
- from App.functions_rupture.functions_gestion import *
6
- from App.utils.divers_function import *
7
- from App.utils.filter_dataframe import *
8
- from App.utils.filter_dataframe import *
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
- # Page configuration
12
  def config_page():
13
  st.set_page_config(
14
  page_title="Gestion des ruptures",
15
  page_icon="images/Carrefour_logo.png",
16
- layout="wide"
17
  )
18
  hide_streamlit_style = """
19
- <style>
20
- footer {visibility: hidden;}
21
- </style>
22
- """
23
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
24
 
25
 
26
- def app():
27
- st.title("Gestion des ruptures famille ")
28
-
29
- input_box = InputsBox()
30
-
31
- data = input_box.get_data()
32
-
33
- try:
34
- if data.shape[0] != 0 :
35
- st.header("Data")
36
-
37
- st.dataframe(filter_dataframe(data))
38
-
39
- "## Parameters"
40
-
41
- col1, col2 = st.columns(2)
42
-
43
- with col1 :
44
- product_id = input_box.get_product_id()
45
-
46
- with col2 :
47
- class_id = input_box.get_class_id()
48
-
49
- '## Filters'
50
- col1, col2 = st.columns(2)
51
-
52
- with col1 :
53
- min_product_id = input_box.valid_produict_id()
54
-
55
- with col2 :
56
- vaind_class_id = input_box.valid_class_id()
57
-
58
- columns1, columns2, columns3 = st.columns(3)
59
-
60
- with columns1:
61
- nb_countries = input_box.get_number_countries()
62
-
63
- with columns2 :
64
- proportion = input_box.get_proportion()
65
-
66
- with columns3 :
67
- show_proportion = input_box.show_proportion()
68
-
69
- # excution
70
- if st.button("RUN ", key="run_button"):
71
- data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
72
- Country, merged = nouvelle_data(data,
73
- str(product_id),
74
- str(class_id))
75
-
76
- data_with_pro = finale_merged(merged,
77
- Country,
78
- product_id,
79
- class_id)
80
-
81
- if show_proportion :
82
- display_data_with_download_button(data_with_pro, title="Show data with ratios")
83
-
84
-
85
- """## The data below is filtered as follows: """
86
- "- Number of countries greater than or equal to ", nb_countries
87
- "- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
88
-
89
- data_countries_ratio = cond_pays_proportion(data_with_pro,
90
- nb_countries,
91
- proportion,
92
- product_id)
93
-
94
- if data_countries_ratio.empty :
95
- st.write("No result for the above criterion ")
96
-
97
- else :
98
- df = supprime_country(data_countries_ratio)
99
- max_number_index = df.groupby(product_id)['nombre'].idxmax()
100
- df_max_number = df.loc[max_number_index]
101
- df_max_number.drop(["Countries"], axis = 1, inplace =True)
102
-
103
- finale_df = Merger(data,
104
- df_max_number,
105
- product_id,
106
- class_id)
107
 
108
- tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
109
 
110
- with tab1 :
111
- display_data_with_download_button(df, title="Data without decision-making")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- with tab2 :
114
- display_data_with_download_button(finale_df, title="Data with proposed changes")
115
 
116
- "## Country priority "
117
 
118
- priority_data, df_equa, df_nequa = cond_pays_priorite(data_with_pro, product_id)
119
- tab1, tab2, tab3, tab4 = st.tabs(["Data without decision-making", "Equality case and more than 1", "Cases of inequality", "Data with proposed changes more than 2"])
 
 
 
120
 
121
- with tab1 :
122
- display_data_with_download_button(priority_data, title="Data without decision-making")
 
123
 
124
- with tab2 :
125
- display_data_with_download_button(df_equa, title="Equality case")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- with tab3 :
128
- df_nequa_ = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) > 2))]
129
- display_data_with_download_button(df_nequa_, title="Cases of inequality")
130
-
131
- max_poids_index = df_nequa_.groupby(product_id)['Poids'].idxmax()
 
 
 
132
 
133
-
134
- df_max_poids = df_nequa_.loc[max_poids_index]
135
- df_max_poids.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
136
- finale_df_ = Merger(data,df_max_poids, product_id, class_id)
137
- with tab4 :
138
- display_data_with_download_button(finale_df_, title="Data with proposed changes more than 2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
- # one vs one
142
- df_nequa_1 = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) == 2))]
143
- max_poids_index1 = df_nequa_1.groupby(product_id)['Poids'].idxmax()
144
- df_max_poids1 = df_nequa_1.loc[max_poids_index1]
145
- df_max_poids1.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
 
 
 
 
 
 
 
 
 
146
 
147
- finale_df_1 = ajout_simularite(Merger(data,df_max_poids1, product_id, class_id))
148
- display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
149
- st.success('Done!', icon="✅")
150
- st.balloons()
151
 
152
- except:
153
- pass
154
- #st.error('This is an error', icon="🚨")
155
- st.info('Ensure that column names are capitalized and that product_id and class_id descriptions are present, as well as a country column.', icon="ℹ️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
 
158
  if __name__ == "__main__":
159
- lien_label = "Example of input"
160
- lien_url = "https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905"
161
- lien_html = f'<a href="{lien_url}">{lien_label}</a>'
162
-
163
- lien_label_ = "Documentation utilisateur"
164
- lien_url_ = "https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing"
165
- lien_html_ = f'<a href="{lien_url_}">{lien_label_}</a>'
166
-
167
  config_page()
168
- st.sidebar.markdown(lien_html_, unsafe_allow_html=True)
169
- st.sidebar.markdown(lien_html, unsafe_allow_html=True)
 
 
 
 
 
 
170
  app()
 
1
+ import streamlit as st
2
+ from App.class_input_box.input_box import InputsBox
3
+ from App.functions_rupture.functions_gestion import (
4
+ filter_data_with_valid_keys,
5
+ process_new_data,
6
+ finalize_merged_data,
7
+ filter_by_country_and_proportion,
8
+ process_country_priority,
9
+ )
10
+ from App.utils.divers_function import (
11
+ display_data_with_download_button,
12
+ supprime_country,
13
+ merge_and_update_classification,
14
+ add_text_similarity
15
+ )
16
+ from App.utils.filter_dataframe import filter_dataframe
17
+ import logging
18
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
19
 
20
 
 
21
  def config_page():
22
  st.set_page_config(
23
  page_title="Gestion des ruptures",
24
  page_icon="images/Carrefour_logo.png",
25
+ layout="wide",
26
  )
27
  hide_streamlit_style = """
28
+ <style>
29
+ footer {visibility: hidden;}
30
+ </style>
31
+ """
32
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
33
 
34
 
35
+ def display_filters(input_box):
36
+ col1, col2 = st.columns(2)
37
+ with col1:
38
+ product_id = input_box.get_product_id()
39
+ min_product_id = input_box.valid_produict_id()
40
+ with col2:
41
+ class_id = input_box.get_class_id()
42
+ valid_class_id = input_box.valid_class_id()
43
+
44
+ col1, col2, col3 = st.columns(3)
45
+ with col1:
46
+ nb_countries = input_box.get_number_countries()
47
+ with col2:
48
+ proportion = input_box.get_proportion()
49
+ with col3:
50
+ show_proportion = input_box.show_proportion()
51
+
52
+ return (
53
+ product_id,
54
+ class_id,
55
+ min_product_id,
56
+ valid_class_id,
57
+ nb_countries,
58
+ proportion,
59
+ show_proportion,
60
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
62
 
63
+ def process_data(
64
+ data,
65
+ product_id,
66
+ class_id,
67
+ min_product_id,
68
+ valid_class_id,
69
+ nb_countries,
70
+ proportion,
71
+ show_proportion,
72
+ ):
73
+ logging.debug(f"Starting process_data with product_id: {product_id}, class_id: {class_id}")
74
+ data = filter_data_with_valid_keys(
75
+ data, product_id, class_id, min_product_id, valid_class_id
76
+ )
77
+ # st.dataframe(data)
78
+ logging.debug("Data filtered with valid keys")
79
+ Country, merged = process_new_data(data, str(product_id), class_id)
80
+ logging.debug("New data processed")
81
+ data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
82
+ # st.dataframe(data_with_pro)
83
+ logging.debug("Merged data finalized")
84
+ if show_proportion:
85
+ logging.info("ibra 1 ")
86
+ display_data_with_download_button(data_with_pro, title="Show data with ratios")
87
+ logging.info("ibra 2")
88
+
89
+ st.write("## The data below is filtered as follows:")
90
+ st.write(f"- Number of countries greater than or equal to {nb_countries}")
91
+ st.write(
92
+ f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
93
+ )
94
+ data_countries_ratio = filter_by_country_and_proportion(
95
+ data_with_pro, nb_countries, proportion, product_id
96
+ )
97
+ if data_countries_ratio.empty:
98
+ st.write("No result for the above criterion")
99
+ else:
100
+ display_filtered_data(data, data_countries_ratio, product_id, class_id)
101
 
102
+ display_country_priority(data_with_pro, data, product_id, class_id)
 
103
 
 
104
 
105
+ def display_filtered_data(data, data_countries_ratio, product_id, class_id):
106
+ df = supprime_country(data_countries_ratio)
107
+ max_number_index = df.groupby(product_id)["count"].idxmax()
108
+ df_max_number = df.loc[max_number_index]
109
+ df_max_number.drop(["Countries"], axis=1, inplace=True)
110
 
111
+ finale_df = merge_and_update_classification(
112
+ data, df_max_number, product_id, class_id
113
+ )
114
 
115
+ tab1, tab2 = st.tabs(
116
+ ["Data without decision-making", "Data with proposed changes"]
117
+ )
118
+ with tab1:
119
+ display_data_with_download_button(
120
+ df,
121
+ title="Data without decision-making"
122
+ )
123
+ with tab2:
124
+ display_data_with_download_button(
125
+ finale_df,
126
+ title="Data with proposed changes"
127
+ )
128
+
129
+
130
+ def display_country_priority(data_with_pro, data, product_id, class_id):
131
+ st.write("## Country priority")
132
+ priority_data, df_equa, df_nequa = process_country_priority(
133
+ data_with_pro, product_id
134
+ )
135
 
136
+ tab1, tab2, tab3, tab4 = st.tabs(
137
+ [
138
+ "Data without decision-making",
139
+ "Equality case and more than 1",
140
+ "Cases of inequality",
141
+ "Data with proposed changes more than 2",
142
+ ]
143
+ )
144
 
145
+ with tab1:
146
+ display_data_with_download_button(
147
+ priority_data, title="Data without decision-making"
148
+ )
149
+ with tab2:
150
+ display_data_with_download_button(df_equa, title="Equality case")
151
+ with tab3:
152
+ df_nequa_ = df_nequa[df_nequa.total_by_product.apply(
153
+ lambda x: int(x) > 2
154
+ )
155
+ ]
156
+ display_data_with_download_button(
157
+ df_nequa_,
158
+ title="Cases of inequality"
159
+ )
160
+ with tab4:
161
+ display_proposed_changes(df_nequa_, data, product_id, class_id)
162
+
163
+ display_one_vs_one(df_nequa, data, product_id, class_id)
164
+
165
+
166
+ def display_proposed_changes(df_nequa_, data, product_id, class_id):
167
+ max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
168
+ df_max_poids = df_nequa_.loc[max_poids_index]
169
+ df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
170
+ finale_df_ = merge_and_update_classification(
171
+ data, df_max_poids, product_id, class_id
172
+ )
173
+ display_data_with_download_button(
174
+ finale_df_, title="Data with proposed changes more than 2"
175
+ )
176
 
177
 
178
+ def display_one_vs_one(df_nequa, data, product_id, class_id):
179
+ df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
180
+ max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
181
+ df_max_poids1 = df_nequa_1.loc[max_poids_index1]
182
+ df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
183
+ finale_df_1 = add_text_similarity(
184
+ merge_and_update_classification(
185
+ data,
186
+ df_max_poids1,
187
+ product_id, class_id)
188
+ )
189
+ display_data_with_download_button(
190
+ finale_df_1, title=" One vs One with similarity score"
191
+ )
192
 
 
 
 
 
193
 
194
+ def app():
195
+ logging.info("Starting app function")
196
+ st.title("Gestion des ruptures famille")
197
+ input_box = InputsBox()
198
+ data = input_box.get_data()
199
+ logging.debug(f"Data retrieved: {data is not None}")
200
+
201
+ if data is not None and data.shape[0] != 0:
202
+ logging.info("Data is valid, proceeding with processing")
203
+ st.header("Data")
204
+ st.dataframe(filter_dataframe(data))
205
+
206
+ st.header("Parameters")
207
+ (
208
+ product_id,
209
+ class_id,
210
+ min_product_id,
211
+ valid_class_id,
212
+ nb_countries,
213
+ proportion,
214
+ show_proportion,
215
+ ) = display_filters(input_box)
216
+ list_product_selected = filter_dataframe(
217
+ data,
218
+ 'data_filter_by_holding'
219
+ )[product_id].unique().tolist()
220
+ if list_product_selected is not None and len(list_product_selected) > 0:
221
+ data_selected = data[data[product_id].isin(list_product_selected)]
222
+ else:
223
+ st.warning("No addictionnal filter selecting")
224
+ data_selected = data.copy()
225
+ if st.button("RUN", key="run_button"):
226
+ try:
227
+ process_data(
228
+ data_selected,
229
+ product_id,
230
+ class_id,
231
+ min_product_id,
232
+ valid_class_id,
233
+ nb_countries,
234
+ proportion,
235
+ show_proportion,
236
+ )
237
+ st.success("Done!", icon="✅")
238
+ st.balloons()
239
+ except Exception as e:
240
+ st.error(f"An error occurred: {str(e)}", icon="🚨")
241
+ else:
242
+ logging.warning("Data is None or empty")
243
+ st.info(
244
+ """Ensure that column names are capitalized and that product_id
245
+ and class_id descriptions are present, as well as a country
246
+ column.""",
247
+ icon="ℹ️",
248
+ )
249
+ logging.info("App function completed")
250
 
251
 
252
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
253
  config_page()
254
+ st.sidebar.markdown(
255
+ '<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
256
+ unsafe_allow_html=True,
257
+ )
258
+ st.sidebar.markdown(
259
+ '<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
260
+ unsafe_allow_html=True,
261
+ )
262
  app()
pages/🦾_Gestion_de_rupture_sous_famille.py CHANGED
@@ -1,172 +1,261 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import time
4
- from App.class_input_box.input_box import *
5
- from App.functions_rupture.functions_gestion import *
6
- from App.utils.divers_function import *
7
- from App.utils.filter_dataframe import *
8
- from App.utils.filter_dataframe import *
9
-
10
-
11
- # Page configuration
12
- st.set_page_config(
13
- page_title="Gestion des ruptures",
14
- page_icon="images/Carrefour_logo.png",
15
- layout="wide"
16
  )
17
- hide_streamlit_style = """
18
- <style>
19
- footer {visibility: hidden;}
20
- </style>
21
- """
22
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
23
-
24
-
25
- def app():
26
- st.title("Gestion des ruptures sous famille")
27
-
28
- input_box = InputsBox()
29
-
30
- data = input_box.get_data()
31
-
32
- try:
33
- if data.shape[0] != 0 :
34
- st.header("Data")
35
-
36
- st.dataframe(filter_dataframe(data))
37
-
38
- "## Parameters"
39
-
40
- col1, col2 = st.columns(2)
41
-
42
- with col1 :
43
- product_id = input_box.get_product_id()
44
-
45
- with col2 :
46
- class_id = input_box.get_class_id()
47
-
48
- '## Filters'
49
- col1, col2 = st.columns(2)
50
-
51
- with col1 :
52
- min_product_id = input_box.valid_produict_id()
53
-
54
- with col2 :
55
- vaind_class_id = input_box.valid_class_id()
56
-
57
- columns1, columns2, columns3, columns4 = st.columns(4)
58
-
59
- with columns1:
60
- nb_countries = input_box.get_number_countries()
61
-
62
- with columns2 :
63
- proportion = input_box.get_proportion()
64
-
65
- with columns3 :
66
- countries = input_box.get_countries()
67
-
68
- with columns4 :
69
- show_proportion = input_box.show_proportion()
70
-
71
- #execution
72
- if st.button("RUN ", key="run_button"):
73
- data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
74
- data = data[data.COUNTRY_KEY.isin(countries)]
75
- Country, merged = nouvelle_data(data,
76
- str(product_id),
77
- str(class_id))
78
-
79
- data_with_pro = finale_merged(merged,
80
- Country,
81
- product_id,
82
- class_id)
83
-
84
- if show_proportion :
85
- display_data_with_download_button(data_with_pro, title="Show data with ratios")
86
-
87
-
88
- """## The data below is filtered as follows: """
89
- "- Number of countries greater than or equal to ", nb_countries
90
- "- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
91
-
92
- data_countries_ratio = cond_pays_proportion(data_with_pro,
93
- nb_countries,
94
- proportion,
95
- product_id)
96
-
97
- if data_countries_ratio.empty :
98
- st.write("No result for the above criterion ")
99
-
100
- else :
101
- df = supprime_country(data_countries_ratio)
102
- max_number_index = df.groupby(product_id)['nombre'].idxmax()
103
- df_max_number = df.loc[max_number_index]
104
- df_max_number.drop(["Countries"], axis = 1, inplace =True)
105
-
106
- finale_df = Merger(data,
107
- df_max_number,
108
- product_id,
109
- class_id)
110
-
111
- tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
112
-
113
- with tab1 :
114
- display_data_with_download_button(df, title="Data without decision-making")
115
-
116
- with tab2 :
117
- display_data_with_download_button(finale_df, title="Data with proposed changes")
118
-
119
- "## Country priority "
120
-
121
- priority_data, df_equa, df_nequa = cond_pays_priorite(data_with_pro, product_id)
122
- tab1, tab2, tab3, tab4 = st.tabs(["Data without decision-making", "Equality case and mt1", "Cases of inequality", "Data with proposed changes mt2"])
123
-
124
- with tab1 :
125
- display_data_with_download_button(priority_data, title="Data without decision-making")
126
-
127
- with tab2 :
128
- display_data_with_download_button(df_equa, title="Equality case")
129
-
130
- with tab3 :
131
- df_nequa_ = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) > 2))]
132
- display_data_with_download_button(df_nequa_, title="Cases of inequality")
133
-
134
- max_poids_index = df_nequa_.groupby(product_id)['Poids'].idxmax()
135
 
136
-
137
- df_max_poids = df_nequa_.loc[max_poids_index]
138
- df_max_poids.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
139
- finale_df_ = Merger(data,df_max_poids, product_id, class_id)
140
- with tab4 :
141
- display_data_with_download_button(finale_df_, title="Data with proposed changes mt2")
142
 
143
 
144
- # one vs one
145
- df_nequa_1 = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) == 2))]
146
- max_poids_index1 = df_nequa_1.groupby(product_id)['Poids'].idxmax()
147
- df_max_poids1 = df_nequa_1.loc[max_poids_index1]
148
- df_max_poids1.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- finale_df_1 = ajout_simularite(Merger(data,df_max_poids1, product_id, class_id))
151
- display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
152
- st.success('Done!', icon="✅")
153
- st.balloons()
154
 
155
- except:
156
- pass
157
- #st.error('This is an error', icon="🚨")
158
- st.info('Ensure that column names are capitalized and that product_id and class_id descriptions are present, as well as a country column.', icon="ℹ️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
 
161
  if __name__ == "__main__":
162
- lien_label = "# Example of input"
163
- lien_url = "https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905"
164
- lien_html = f'<a href="{lien_url}">{lien_label}</a>'
165
-
166
- lien_label_ = "Documentation utilisateur"
167
- lien_url_ = "https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing"
168
- lien_html_ = f'<a href="{lien_url_}">{lien_label_}</a>'
169
-
170
- st.sidebar.markdown(lien_html_, unsafe_allow_html=True)
171
- st.sidebar.markdown(lien_html, unsafe_allow_html=True)
172
  app()
 
1
+ import streamlit as st
2
+ from App.class_input_box.input_box import InputsBox
3
+ from App.functions_rupture.functions_gestion import (
4
+ filter_data_with_valid_keys,
5
+ process_new_data,
6
+ finalize_merged_data,
7
+ filter_by_country_and_proportion,
8
+ process_country_priority,
 
 
 
 
 
 
 
9
  )
10
+ from App.utils.divers_function import (
11
+ display_data_with_download_button,
12
+ supprime_country,
13
+ merge_and_update_classification,
14
+ add_text_similarity,
15
+ )
16
+ from App.utils.filter_dataframe import filter_dataframe
17
+ import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ logging.basicConfig(
20
+ level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
21
+ )
 
 
 
22
 
23
 
24
+ def config_page():
25
+ st.set_page_config(
26
+ page_title="Gestion des ruptures",
27
+ page_icon="images/Carrefour_logo.png",
28
+ layout="wide",
29
+ )
30
+ hide_streamlit_style = """
31
+ <style>
32
+ footer {visibility: hidden;}
33
+ </style>
34
+ """
35
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
36
+
37
+
38
+ def display_filters(input_box):
39
+ col1, col2 = st.columns(2)
40
+ with col1:
41
+ product_id = input_box.get_product_id()
42
+ min_product_id = input_box.valid_produict_id()
43
+ with col2:
44
+ class_id = input_box.get_class_id()
45
+ valid_class_id = input_box.valid_class_id()
46
+
47
+ col1, col2, col3 = st.columns(3)
48
+ with col1:
49
+ nb_countries = input_box.get_number_countries()
50
+ with col2:
51
+ proportion = input_box.get_proportion()
52
+ with col3:
53
+ show_proportion = input_box.show_proportion()
54
+
55
+ countries = input_box.get_countries()
56
+
57
+ return (
58
+ product_id,
59
+ class_id,
60
+ min_product_id,
61
+ valid_class_id,
62
+ nb_countries,
63
+ proportion,
64
+ countries,
65
+ show_proportion,
66
+ )
67
+
68
+
69
+ def process_data(
70
+ data,
71
+ product_id,
72
+ class_id,
73
+ min_product_id,
74
+ valid_class_id,
75
+ nb_countries,
76
+ proportion,
77
+ countries,
78
+ show_proportion,
79
+ ):
80
+ logging.debug(
81
+ f"Starting process_data with product_id: {product_id}, class_id: {class_id}"
82
+ )
83
+ data = filter_data_with_valid_keys(
84
+ data, product_id, class_id, min_product_id, valid_class_id
85
+ )
86
+ data = data[data.COUNTRY_KEY.isin(countries)]
87
+ logging.debug("Data filtered with valid keys and countries")
88
+ Country, merged = process_new_data(data, str(product_id), str(class_id))
89
+ logging.debug("New data processed")
90
+ data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
91
+ logging.debug("Merged data finalized")
92
+
93
+ if show_proportion:
94
+ logging.info("Displaying data with ratios")
95
+ display_data_with_download_button(data_with_pro, title="Show data with ratios")
96
+
97
+ st.write("## The data below is filtered as follows:")
98
+ st.write(f"- Number of countries greater than or equal to {nb_countries}")
99
+ st.write(
100
+ f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
101
+ )
102
+
103
+ data_countries_ratio = filter_by_country_and_proportion(
104
+ data_with_pro, nb_countries, proportion, product_id
105
+ )
106
+
107
+ if data_countries_ratio.empty:
108
+ st.write("No result for the above criterion")
109
+ else:
110
+ display_filtered_data(data, data_countries_ratio, product_id, class_id)
111
+
112
+ display_country_priority(data_with_pro, data, product_id, class_id)
113
+
114
+
115
+ def display_filtered_data(data, data_countries_ratio, product_id, class_id):
116
+ df = supprime_country(data_countries_ratio)
117
+ max_number_index = df.groupby(product_id)["count"].idxmax()
118
+ df_max_number = df.loc[max_number_index]
119
+ df_max_number.drop(["Countries"], axis=1, inplace=True)
120
+
121
+ finale_df = merge_and_update_classification(
122
+ data, df_max_number, product_id, class_id
123
+ )
124
+
125
+ tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
126
+ with tab1:
127
+ display_data_with_download_button(df, title="Data without decision-making")
128
+ with tab2:
129
+ display_data_with_download_button(finale_df, title="Data with proposed changes")
130
+
131
+
132
+ def display_country_priority(data_with_pro, data, product_id, class_id):
133
+ st.write("## Country priority")
134
+ priority_data, df_equa, df_nequa = process_country_priority(
135
+ data_with_pro, product_id
136
+ )
137
+
138
+ tab1, tab2, tab3, tab4 = st.tabs(
139
+ [
140
+ "Data without decision-making",
141
+ "Equality case and more than 1",
142
+ "Cases of inequality",
143
+ "Data with proposed changes more than 2",
144
+ ]
145
+ )
146
+
147
+ with tab1:
148
+ display_data_with_download_button(
149
+ priority_data, title="Data without decision-making"
150
+ )
151
+ with tab2:
152
+ display_data_with_download_button(df_equa, title="Equality case")
153
+ with tab3:
154
+ df_nequa_ = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) > 2)]
155
+ display_data_with_download_button(df_nequa_, title="Cases of inequality")
156
+ with tab4:
157
+ display_proposed_changes(df_nequa_, data, product_id, class_id)
158
+
159
+ display_one_vs_one(df_nequa, data, product_id, class_id)
160
+
161
+
162
+ def display_proposed_changes(df_nequa_, data, product_id, class_id):
163
+ max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
164
+ df_max_poids = df_nequa_.loc[max_poids_index]
165
+ df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
166
+ finale_df_ = merge_and_update_classification(
167
+ data, df_max_poids, product_id, class_id
168
+ )
169
+ display_data_with_download_button(
170
+ finale_df_, title="Data with proposed changes more than 2"
171
+ )
172
+
173
+
174
+ def display_one_vs_one(df_nequa, data, product_id, class_id):
175
+ df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
176
+ max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
177
+ df_max_poids1 = df_nequa_1.loc[max_poids_index1]
178
+ df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
179
+
180
+ finale_df_1 = add_text_similarity(
181
+ merge_and_update_classification(data, df_max_poids1, product_id, class_id)
182
+ )
183
+ display_data_with_download_button(
184
+ finale_df_1, title=" One vs One with similarity score"
185
+ )
186
 
 
 
 
 
187
 
188
+ def app():
189
+ logging.info("Starting app function")
190
+ st.title("Gestion des ruptures sous famille")
191
+ input_box = InputsBox()
192
+ data = input_box.get_data()
193
+ logging.debug(f"Data retrieved: {data is not None}")
194
+
195
+ if data is not None and data.shape[0] != 0:
196
+ logging.info("Data is valid, proceeding with processing")
197
+ st.header("Data")
198
+ st.dataframe(filter_dataframe(data))
199
+
200
+ st.header("Parameters")
201
+ (
202
+ product_id,
203
+ class_id,
204
+ min_product_id,
205
+ valid_class_id,
206
+ nb_countries,
207
+ proportion,
208
+ countries,
209
+ show_proportion,
210
+ ) = display_filters(input_box)
211
+
212
+ list_product_selected = (
213
+ filter_dataframe(data, "data_filter_by_holding")[product_id]
214
+ .unique()
215
+ .tolist()
216
+ )
217
+ if list_product_selected is not None and len(list_product_selected) > 0:
218
+ data_selected = data[data[product_id].isin(list_product_selected)]
219
+ else:
220
+ st.warning("No additional filter selected")
221
+ data_selected = data.copy()
222
+
223
+ if st.button("RUN", key="run_button"):
224
+ try:
225
+ process_data(
226
+ data_selected,
227
+ product_id,
228
+ class_id,
229
+ min_product_id,
230
+ valid_class_id,
231
+ nb_countries,
232
+ proportion,
233
+ countries,
234
+ show_proportion,
235
+ )
236
+ st.success("Done!", icon="✅")
237
+ st.balloons()
238
+ except Exception as e:
239
+ st.error(f"An error occurred: {str(e)}", icon="🚨")
240
+ else:
241
+ logging.warning("Data is None or empty")
242
+ st.info(
243
+ """Ensure that column names are capitalized and that product_id
244
+ and class_id descriptions are present, as well as a country
245
+ column.""",
246
+ icon="ℹ️",
247
+ )
248
+ logging.info("App function completed")
249
 
250
 
251
  if __name__ == "__main__":
252
+ config_page()
253
+ st.sidebar.markdown(
254
+ '<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
255
+ unsafe_allow_html=True,
256
+ )
257
+ st.sidebar.markdown(
258
+ '<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
259
+ unsafe_allow_html=True,
260
+ )
 
261
  app()