Spaces:

bourahima
/

Carrefourrefbem

Sleeping

App Files Files

COULIBALY Bourahima commited on Sep 5, 2024

Commit

2c49a88

1 Parent(s): 38b4487

update

Browse files

Files changed (10) hide show

.vscode/settings.json +3 -0
App/class_input_box/input_box.py +60 -28
App/functions_rupture/functions_gestion.py +355 -147
App/utils/divers_function.py +175 -77
App/utils/filter_dataframe.py +19 -7
App/utils/priorite_pays.py +51 -7
App/utils/standadisation.py +124 -16
app.py +7 -9
pages/🤖_Gestion_de_rupture_famille.py +233 -141
pages/🦾_Gestion_de_rupture_sous_famille.py +251 -162

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "DockerRun.DisableAutoGenerateConfig": true
+}

App/class_input_box/input_box.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import pandas as pd
 import streamlit as st
-from typing import Dict
 class InputsBox:
@@ -9,49 +8,82 @@ class InputsBox:
         self.columns = None
         self.product_id = None
         self.class_id = None
-    def get_data(self) :
-        uploaded_file = st.file_uploader("Choose a CSV file with the separator ';' ", type=["csv"])
         if uploaded_file is not None:
-            #try :
-                self.data = pd.read_csv(uploaded_file,dtype=str,  sep=";", encoding="latin-1")
-            #except :
-            #    self.data = pd.read_csv(uploaded_file,dtype=str,  sep=";", encoding="utf-8" )
-                self.columns = self.data.columns.tolist()
         return self.data
     def valid_produict_id(self) -> int:
-        min_len = st.number_input("Minimum len of product_id", max_value= 25, min_value=1, value= 2, key = "pp")
         return min_len
     def valid_class_id(self) -> str:
-        valid = st.text_input("First element of No valid class_id separed by ;")
         return valid.split(";")
     def get_product_id(self) -> str:
-        self.product_id = st.selectbox("product_id (BARCODE)", options=self.columns, key="product_id")
         return self.product_id
     def get_class_id(self) -> str:
-        self.class_id = st.selectbox("class_id (WW_CLASS_KEY)", options=self.columns, key="class_id")
         return self.class_id
     def get_countries(self) -> list:
-        countries = st.multiselect("Select countries : " , tuple(self.data.COUNTRY_KEY.unique()), key = "countries")
         return countries
-    def get_number_countries(self) -> int :
-         nb_countries = st.number_input("Number of countries", min_value=1, max_value=20, value=1, key="Number of countries")
-         return nb_countries
     def get_proportion(self) -> float:
-        proportion = st.number_input("Proportion", min_value=0.10, max_value=1.00, value=0.75, key="proportion")
         return proportion
     def show_proportion(self) -> bool:
-        show_condition = st.checkbox("Show data with ratios ", value=True, key="show_ratio_checkbox")
-        return show_condition

 import pandas as pd
 import streamlit as st
 class InputsBox:
         self.columns = None
         self.product_id = None
         self.class_id = None
+    def get_data(self):
+        uploaded_file = st.file_uploader(
+            "Choose a CSV file with the separator ';' ", type=["csv"]
+        )
         if uploaded_file is not None:
+            # try :
+            self.data = pd.read_csv(
+                uploaded_file, dtype=str, sep=";", encoding="latin-1"
+            )
+            # except :
+            #    self.data = pd.read_csv(
+            # uploaded_file,dtype=str,
+            # sep=";",
+            # encoding="utf-8" )
+            self.columns = self.data.columns.tolist()
         return self.data
     def valid_produict_id(self) -> int:
+        min_len = st.number_input(
+            "Minimum len of product_id",
+            max_value=25,
+            min_value=1,
+            value=2,
+            key="pp"
+        )
         return min_len
     def valid_class_id(self) -> str:
+        valid = st.text_input(
+            "First element of No valid class_id separed by ;"
+        )
         return valid.split(";")
     def get_product_id(self) -> str:
+        self.product_id = st.selectbox(
+            "product_id (BARCODE)", options=self.columns, key="product_id"
+        )
         return self.product_id
     def get_class_id(self) -> str:
+        self.class_id = st.selectbox(
+            "class_id (WW_CLASS_KEY)", options=self.columns, key="class_id"
+        )
         return self.class_id
     def get_countries(self) -> list:
+        countries = st.multiselect(
+            "Select countries : ",
+            tuple(self.data.COUNTRY_KEY.unique()),
+            key="countries",
+        )
         return countries
+    def get_number_countries(self) -> int:
+        nb_countries = st.number_input(
+            "Number of countries",
+            min_value=1,
+            max_value=20,
+            value=1,
+            key="Number of countries",
+        )
+        return nb_countries
     def get_proportion(self) -> float:
+        proportion = st.number_input(
+            "Proportion",
+            min_value=0.10, max_value=1.00, value=0.75, key="proportion"
+        )
         return proportion
     def show_proportion(self) -> bool:
+        show_condition = st.checkbox(
+            "Show data with ratios ", value=True, key="show_ratio_checkbox"
+        )
+        return show_condition

App/functions_rupture/functions_gestion.py CHANGED Viewed

@@ -1,174 +1,382 @@
 import numpy as np
 import pandas as pd
-import streamlit as st
-from App.utils.priorite_pays import *
 import nltk
-nltk.download('stopwords')
-def data_with_valide_key(data, product_id, class_id, min_prd_id, valid_class_id):
-    data = data[data[product_id].str.len() > min_prd_id]
-    try :
-        data = data[~data[class_id].str[0].isin(valid_class_id)]
-    except :
         pass
-    return data
-def calcul_total_par_ligne(data, produit_id, class_id):
-    matrice = pd.crosstab(data[produit_id], data[class_id])
-    total_by_line = matrice.sum(axis = 1)
-    plus_k_2 = list(total_by_line[total_by_line.apply(lambda x : x > 1)].index)
-    df = data[data[produit_id].isin(plus_k_2)]
-    matrice = pd.crosstab(df[produit_id], df[class_id])
-    total_by_line = matrice.sum(axis = 1)
-    total_by_line = pd.DataFrame({produit_id : total_by_line.index , "total_by_line": total_by_line.values})
-    return total_by_line, matrice
 @st.cache_data
-def matrice_creuse(matrice, produit_id, class_id):
-    stacked = matrice.stack()
     non_zero = stacked[stacked != 0]
-    ligne = non_zero.index.get_level_values(0).astype(str).tolist()
-    colonne = non_zero.index.get_level_values(1).astype(str).tolist()
-    valeur = non_zero.values.tolist()
-    sparse_matrix = pd.DataFrame({produit_id : ligne, class_id : colonne, "nombre": valeur})
     return sparse_matrix
 @st.cache_data
-def nouvelle_data( data, produit_id, class_id):
-    total_by_line, matrice = calcul_total_par_ligne(data, produit_id, class_id)
-    sparse_matrix = matrice_creuse( matrice, produit_id,class_id)
-    Data = pd.merge(sparse_matrix, total_by_line, on =[produit_id])
-    Data["Proportion"] = Data.nombre / Data.total_by_line
-    merged = Data.merge(data, left_on=[class_id, produit_id], right_on=[class_id, produit_id])
-    try :
-       Country = merged.groupby([class_id, produit_id])['Country'].agg(lambda x: x.tolist())
-    except :
-       try :
-           Country = merged.groupby([class_id, produit_id])['COUNTRY_KEY'].agg(lambda x: x.tolist())
-       except :
-           try :
-               Country = merged.groupby([class_id, produit_id])['COUNTRY'].agg(lambda x: x.tolist())
-           except :
-               pass
-    return Country, merged
-def add_country(produit_id, class_id, Country):
     return Country[produit_id, class_id]
-def finale_merged(merged, Country, produit_id, class_id):
-    merged_finale = None
     try:
-        merged["Countries"] = merged.apply(lambda row: add_country(row[1], row[0], Country), axis=1)
-        merged['Countries'] = merged['Countries'].apply(tuple)
-        merged_finale = merged.drop_duplicates(subset=[produit_id, class_id, 'Countries'])
     except Exception as e:
-        st.warning(f"Une erreur s'est produite : {e}")
-    finally:
-        return merged_finale
-def cond_pays_proportion(merged_finale, nb_pays, proportion, produit_id):
-    data = merged_finale[((merged_finale.Proportion >= proportion) & (merged_finale.total_by_line >= nb_pays))]
-    cles = data[produit_id].unique()
-    df = merged_finale[merged_finale[produit_id].isin(cles)]
-    return df
-def cond_pays_priorite(merged_finale, produit_id):
-    data = merged_finale[((merged_finale.Proportion == 0.5) & (merged_finale.total_by_line >= 2))]
-    cles = data[produit_id].unique()
-    df = merged_finale[merged_finale[produit_id].isin(cles)]
-    df.loc[:, "Poids"] = df["Countries"].apply(lambda x : np.sum([dico[y] for y in x]))
-    duplicated_subclass = df.duplicated(subset=[produit_id, "Poids"], keep=False)
-    df_equa = df[duplicated_subclass]
-    df_equa = df_equa[(df_equa.Proportion == 0.5)]
-    df_nequa = df[~df.isin(df_equa)].dropna()
-    return df, df_equa, df_nequa
-def finale_merge(data, new_data, produit_id, class_id):
-    merged_df = pd.merge(data, new_data, on=["COUNTRY_KEY", produit_id], how="left", suffixes=("", "_y"))
-    merged_df[class_id] = merged_df[f"{class_id}_y"].fillna(merged_df[class_id])
-    merged_df[f"{class_id[:-4]}_DESC_FR"] = merged_df[f"{class_id[:-4]}_DESC_FR_y"].fillna(merged_df[f"{class_id[:-4]}_DESC_FR"])
-    df_finale = merged_df[[produit_id, "COUNTRY_KEY",class_id,	f"{class_id[:-4]}_DESC_FR"]]
-    merged = pd.merge(data, df_finale, how='outer',  indicator=True)
-    data_finale = merged[merged['_merge'] != 'both']
-    data_finale = data_finale.rename(columns={'_merge': 'Changements'})
-    data_finale.sort_values(by =[produit_id], ascending=True, inplace =True)
-    data_finale["Changements"] = data_finale["Changements"].apply(lambda x : "Avant" if x == "left_only" else "Après")
-    data_finale = data_finale[[produit_id, "COUNTRY_KEY" ,	class_id,	f"{class_id[:-4]}_DESC_FR", "Changements"]]
-    data_finale.drop_duplicates(inplace=True)
-    return  data_finale, df_finale
-# brouillon
-def data_1_1(df_nequa, produit_id, class_id):
-    df_nequa_2 = df_nequa[(df_nequa.Countries.apply(lambda x: len(x) > 1))]
-    max_poids_index = df_nequa_2.groupby(produit_id)['Poids'].idxmax()
-    # Updating columns for all rows instead of iterating over unique barcodes
-    df_nequa_2.loc[:, class_id] = df_nequa_2.loc[max_poids_index, class_id].values
-    df_nequa_2.loc[:, f'{class_id[:-4]}_DESC_FR'] = df_nequa_2.loc[max_poids_index, f'{class_id[:-4]}_DESC_FR'].values
-    df_duplicate = df_nequa_2.copy()
-    df_duplicate.Countries = df_duplicate.Countries.apply(lambda x : ','.join(x))
-    new_rows = []
-    for _, row in df_duplicate.iterrows():
-        countries = row['Countries'].split(',')
-        for country in countries:
-            new_row = row.copy()
-            new_row['Countries'] = country
-            new_rows.append(new_row)
-    new_df = pd.DataFrame(new_rows).drop_duplicates()
-    new_df = new_df.rename(columns={'Countries': 'Country'}, errors='ignore')
-    return new_df
-def data_1_FR(df, produit_id, class_id):
-    df_f_f = df[df.Country == "FRA"]
-    barcodes = df_f_f[produit_id].unique()
-    max = 0
-    Barcodes = []
     for barcode in barcodes:
-        items = df_f_f.item_key[df_f_f[produit_id] == barcode].tolist()
         if len(items) == 2:
-          Barcodes.append(barcode)
-          if "R" in items[0]:
-              df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), class_id] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), class_id].values
-              df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), f'{class_id[:-3]}_DESC_FR'] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), f'{class_id[:-3]}_DESC_FR'].values
-          if "R" in items[1]:
-              df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), class_id] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), class_id].values
-              df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[1]), f'{class_id[:-3]}_DESC_FR'] = df_f_f.loc[(df_f_f[produit_id] == barcode) & (df_f_f.item_key == items[0]), f'{class_id[:-3]}_DESC_FR'].values
-    return df_f_f

 import numpy as np
 import pandas as pd
+import streamlit as st
+from App.utils.priorite_pays import dico
+# from App.utils.divers_function import data_cleaning_func
 import nltk
+from typing import Tuple, List
+nltk.download("stopwords")
+def filter_data_with_valid_keys(
+    data: pd.DataFrame,
+    product_id_col: str,
+    class_id_col: str,
+    min_product_id_length: int,
+    valid_class_id_prefixes: List[str],
+) -> pd.DataFrame:
+    """
+    Filter the dataframe based on product ID length and class ID prefixes.
+    Args:
+    data (pd.DataFrame): Input dataframe
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    min_product_id_length (int): Minimum length for product IDs
+    valid_class_id_prefixes (List[str]): List of valid prefixes for class IDs
+    Returns:
+    pd.DataFrame: Filtered dataframe
+    """
+    filtered_data = data[data[product_id_col].str.len() > min_product_id_length]
+    try:
+        filtered_data = filtered_data[
+            ~filtered_data[class_id_col].str[0].isin(valid_class_id_prefixes)
+        ]
+    except Exception:
         pass
+    return filtered_data
+@st.cache_data
+def calculate_product_class_matrix(
+    data: pd.DataFrame, product_id_col: str, class_id_col: str
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Calculate the product-class matrix and total counts per product.
+    Args:
+    data (pd.DataFrame): Input dataframe
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    Tuple[pd.DataFrame, pd.DataFrame]: Total counts per product and product-class matrix
+    """
+    matrix = pd.crosstab(data[product_id_col], data[class_id_col])
+    total_by_product = matrix.sum(axis=1)
+    products_with_multiple_classes = total_by_product[total_by_product > 1].index
+    filtered_data = data[data[product_id_col].isin(products_with_multiple_classes)]
+    matrix = pd.crosstab(filtered_data[product_id_col], filtered_data[class_id_col])
+    total_by_product = matrix.sum(axis=1)
+    total_by_product_df = pd.DataFrame(
+        {
+            product_id_col: total_by_product.index,
+            "total_by_product": total_by_product.values,
+        }
+    )
+    return total_by_product_df, matrix
 @st.cache_data
+def create_sparse_matrix(
+    matrix: pd.DataFrame, product_id_col: str, class_id_col: str
+) -> pd.DataFrame:
+    """
+    Create a sparse matrix representation from the product-class matrix.
+    Args:
+    matrix (pd.DataFrame): Product-class matrix
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    pd.DataFrame: Sparse matrix representation
+    """
+    stacked = matrix.stack()
     non_zero = stacked[stacked != 0]
+    sparse_matrix = pd.DataFrame(
+        {
+            product_id_col: non_zero.index.get_level_values(0).astype(str),
+            class_id_col: non_zero.index.get_level_values(1).astype(str),
+            "count": non_zero.values,
+        }
+    )
     return sparse_matrix
 @st.cache_data
+def process_new_data(
+    data: pd.DataFrame, product_id_col: str, class_id_col: str
+) -> Tuple[pd.Series, pd.DataFrame]:
+    """
+    Process the data to create a new dataset with country groups and merged information.
+    Args:
+    data (pd.DataFrame): Input dataframe
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    Tuple[pd.Series, pd.DataFrame]: Country groups and merged dataframe
+    """
+    total_by_product_df, matrix = calculate_product_class_matrix(
+        data, product_id_col, class_id_col
+    )
+    sparse_matrix = create_sparse_matrix(matrix, product_id_col, class_id_col)
+    merged_data = pd.merge(sparse_matrix, total_by_product_df, on=[product_id_col])
+    merged_data["Proportion"] = merged_data["count"] / merged_data["total_by_product"]
+    final_merged = merged_data.merge(
+        data,
+        left_on=[class_id_col, product_id_col],
+        right_on=[class_id_col, product_id_col],
+    )
+    try:
+        country_groups = final_merged.groupby([class_id_col, product_id_col])[
+            "Country"
+        ].agg(lambda x: x.tolist())
+    except KeyError:
+        try:
+            country_groups = final_merged.groupby([class_id_col, product_id_col])[
+                "COUNTRY_KEY"
+            ].agg(lambda x: x.tolist())
+        except KeyError:
+            country_groups = final_merged.groupby([class_id_col, product_id_col])[
+                "COUNTRY"
+            ].agg(lambda x: x.tolist())
+    return country_groups, final_merged
+def add_country(produit_id: str, class_id: str, Country) -> List[str]:
+    """
+    Retrieve the list of countries for a given product ID and class ID.
+    Args:
+    product_id (str): The product ID
+    class_id (str): The class ID
+    country_groups (pd.Series): Series containing country groups
+    Returns:
+    List[str]: List of countries for the given product and class
+    """
     return Country[produit_id, class_id]
+def finalize_merged_data(
+    merged: pd.DataFrame,
+    country_groups: pd.Series,
+    product_id_col: str,
+    class_id_col: str,
+) -> pd.DataFrame:
+    """
+    Finalize the merged data by adding country information and removing duplicates.
+    Args:
+    merged (pd.DataFrame): Merged dataframe
+    country_groups (pd.Series): Series containing country groups
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    pd.DataFrame: Finalized merged dataframe
+    """
     try:
+        merged["Countries"] = merged.apply(
+            lambda row: add_country(
+                row[1], row[0], country_groups
+            ),
+            axis=1,
+        )
+        merged["Countries"] = merged["Countries"].apply(tuple)
+        final_merged = merged.drop_duplicates(
+            subset=[product_id_col, class_id_col, "Countries"]
+        )
     except Exception as e:
+        st.warning(f"An error occurred: {e}")
+        final_merged = None
+    return final_merged
+def filter_by_country_and_proportion(
+    merged_data: pd.DataFrame,
+    min_countries: int,
+    min_proportion: float,
+    product_id_col: str,
+) -> pd.DataFrame:
+    """
+    Filter the merged data based on minimum number of countries and proportion.
+    Args:
+    merged_data (pd.DataFrame): Merged dataframe
+    min_countries (int): Minimum number of countries required
+    min_proportion (float): Minimum proportion required
+    product_id_col (str): Name of the product ID column
+    Returns:
+    pd.DataFrame: Filtered dataframe
+    """
+    filtered_data = merged_data[
+        (merged_data.Proportion >= min_proportion)
+        & (merged_data.total_by_product >= min_countries)
+    ]
+    product_keys = filtered_data[product_id_col].unique()
+    result_df = merged_data[merged_data[product_id_col].isin(product_keys)]
+    return result_df
+def process_country_priority(
+    merged_data: pd.DataFrame, product_id_col: str
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Process the merged data based on country priority.
+    Args:
+    merged_data (pd.DataFrame): Merged dataframe
+    product_id_col (str): Name of the product ID column
+    Returns:
+    Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: Processed dataframes (all, equal weight, non-equal weight)
+    """
+    data = merged_data[
+        (merged_data.Proportion == 0.5) & (merged_data.total_by_product >= 2)
+    ]
+    product_keys = data[product_id_col].unique()
+    df = merged_data[merged_data[product_id_col].isin(product_keys)]
+    df["Weight"] = df["Countries"].apply(lambda x: sum(dico[y] for y in x))
+    duplicated_subclass = df.duplicated(subset=[product_id_col, "Weight"], keep=False)
+    df_equal = df[duplicated_subclass & (df.Proportion == 0.5)]
+    df_not_equal = df[~df.isin(df_equal)].dropna()
+    return df, df_equal, df_not_equal
+def merge_final_data(
+    original_data: pd.DataFrame,
+    new_data: pd.DataFrame,
+    product_id_col: str,
+    class_id_col: str,
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """
+    Merge the original data with the new processed data.
+    Args:
+    original_data (pd.DataFrame): Original dataframe
+    new_data (pd.DataFrame): New processed dataframe
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    Tuple[pd.DataFrame, pd.DataFrame]: Final merged data and changes summary
+    """
+    merged_df = pd.merge(
+        original_data,
+        new_data,
+        on=["COUNTRY_KEY", product_id_col],
+        how="left",
+        suffixes=("", "_y"),
+    )
+    merged_df[class_id_col] = merged_df[f"{class_id_col}_y"].fillna(
+        merged_df[class_id_col]
+    )
+    merged_df[f"{class_id_col[:-4]}_DESC_FR"] = merged_df[
+        f"{class_id_col[:-4]}_DESC_FR_y"
+    ].fillna(merged_df[f"{class_id_col[:-4]}_DESC_FR"])
+    df_final = merged_df[
+        [product_id_col, "COUNTRY_KEY", class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
+    ]
+    merged = pd.merge(original_data, df_final, how="outer", indicator=True)
+    data_final = merged[merged["_merge"] != "both"]
+    data_final = data_final.rename(columns={"_merge": "Changes"})
+    data_final.sort_values(by=[product_id_col], ascending=True, inplace=True)
+    data_final["Changes"] = data_final["Changes"].apply(
+        lambda x: "Before" if x == "left_only" else "After"
+    )
+    data_final = data_final[
+        [
+            product_id_col,
+            "COUNTRY_KEY",
+            class_id_col,
+            f"{class_id_col[:-4]}_DESC_FR",
+            "Changes",
+        ]
+    ]
+    data_final.drop_duplicates(inplace=True)
+    return data_final, df_final
+def process_non_equal_data(
+    df_not_equal: pd.DataFrame, product_id_col: str, class_id_col: str
+) -> pd.DataFrame:
+    """
+    Process data with non-equal weights, selecting the classification with the highest weight.
+    Args:
+    df_not_equal (pd.DataFrame): Dataframe with non-equal weights
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    pd.DataFrame: Processed dataframe with selected classifications
+    """
+    df_multi_country = df_not_equal[df_not_equal.Countries.apply(len) > 1]
+    max_weight_index = df_multi_country.groupby(product_id_col)["Weight"].idxmax()
+    df_multi_country.loc[:, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]] = (
+        df_multi_country.loc[
+            max_weight_index, [class_id_col, f"{class_id_col[:-4]}_DESC_FR"]
+        ].values
+    )
+    df_duplicate = df_multi_country.copy()
+    df_duplicate.Countries = df_duplicate.Countries.str.join(",")
+    new_df = (
+        df_duplicate.explode("Countries")
+        .rename(columns={"Countries": "Country"})
+        .drop_duplicates()
+    )
+    return new_df
+def process_france_data(
+    df: pd.DataFrame, product_id_col: str, class_id_col: str
+) -> pd.DataFrame:
+    """
+    Process data specific to France, handling special cases for item keys.
+    Args:
+    df (pd.DataFrame): Input dataframe
+    product_id_col (str): Name of the product ID column
+    class_id_col (str): Name of the class ID column
+    Returns:
+    pd.DataFrame: Processed dataframe for France
+    """
+    df_france = df[df.Country == "FRA"]
+    barcodes = df_france[product_id_col].unique()
     for barcode in barcodes:
+        items = df_france.item_key[df_france[product_id_col] == barcode].tolist()
         if len(items) == 2:
+            if "R" in items[0]:
+                df_france.loc[
+                    (df_france[product_id_col] == barcode)
+                    & (df_france.item_key == items[0]),
+                    [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
+                ] = df_france.loc[
+                    (df_france[product_id_col] == barcode)
+                    & (df_france.item_key == items[1]),
+                    [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
+                ].values
+            elif "R" in items[1]:
+                df_france.loc[
+                    (df_france[product_id_col] == barcode)
+                    & (df_france.item_key == items[1]),
+                    [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
+                ] = df_france.loc[
+                    (df_france[product_id_col] == barcode)
+                    & (df_france.item_key == items[0]),
+                    [class_id_col, f"{class_id_col[:-3]}_DESC_FR"],
+                ].values
+    return df_france

App/utils/divers_function.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 import re
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from App.utils.standadisation import *
 from nltk.corpus import stopwords
 from nltk.stem import PorterStemmer
@@ -13,82 +13,123 @@ from nltk.corpus import stopwords
 @st.cache_data
 def convert_df(df):
-    return df.to_csv().encode('utf-8')
 @st.cache_data
 def supprime_country(df):
-    try :
-       df.drop(["Country"], axis = 1, inplace = True)
-    except :
-       try :
-           df.drop(["COUNTRY_KEY"], axis = 1, inplace = True)
-       except :
-           try :
-               df.drop(["COUNTRY"],  axis = 1, inplace = True)
-           except :
-               pass
     return df
-def Merger(df, data_tr, produit_id, class_id):
-    keys = data_tr[produit_id].unique()
-    df_finale_v1 = df[df[produit_id].isin(keys)]
-    df_finale_v1.loc[:,f'old_{class_id}'] = df_finale_v1.loc[:,class_id]
-    merged_df = pd.merge(df_finale_v1, data_tr, on=[produit_id], how='inner',  indicator=True)
-    merged_df[class_id] = merged_df[f'{class_id}_x'].fillna(merged_df[f'old_{class_id}'])
-    # Filtrer les lignes où 'class_id' a été modifié
-    merged_df = merged_df[merged_df[f'{class_id}_x'] != merged_df[f'{class_id}_y']]
-    finale_df = merged_df.drop(["_merge"], axis = 1) #[["COUNTRY_KEY" ,produit_id,"ITEM_DESC_x",f"old_{class_id}",f'{class_id[:-4]}_DESC_FR_x', f'{class_id}_y',  f'{class_id[:-4]}_DESC_FR_y',"ITEM_DESC_y","nombre","total_by_line", "Proportion", "Countries","Poids"]]
-    return finale_df
-def data_cleaning(strings):
     strings = strings.lower().strip()
-    strings = strings.replace('\'',' ')
-    strings = strings.replace('/',' ')
-    strings = re.sub(r'[^\w\s]', ' ', strings)
-    text_normalized = re.sub('[^A-Za-z ,éêèîôœàâ]+', ' ', strings)
     return text_normalized
-def standardization(strings):
-  liste = strings.split(' ')
-  for i in range(len(liste)) :
-    if liste[i] in dictionnaire.keys():
-      liste[i] = dictionnaire[liste[i]]
-  return ' '.join(liste)
-def remove_stop_words(strings):
     liste_stopword_unicode = [str(item) for item in liste_stopword]
-    en_stops = set(stopwords.words('english') + liste_stopword_unicode)
-    fr_stops = set(stopwords.words('french') + liste_stopword_unicode)
-    list_DESCRIPTION = strings.split(' ')
     cleaned_list = []
     for ingredient in list_DESCRIPTION:
-        temp = ingredient.split(' ')
-        cleaned_ingredient = ' '.join([word for word in temp if word.lower() not in en_stops])
         cleaned_list.append(cleaned_ingredient)
-    strings = ' '.join([ingredient for ingredient in cleaned_list])
-    list_DESCRIPTION = strings.split(' ')
     cleaned_list = []
     for ingredient in list_DESCRIPTION:
-        temp = ingredient.split(' ')
-        cleaned_ingredient = ' '.join([word for word in temp if word.lower() not in fr_stops])
         cleaned_list.append(cleaned_ingredient)
-    strings = ' '.join([ingredient for ingredient in cleaned_list])
     return strings
@@ -97,27 +138,31 @@ fr_stemmer = FrenchStemmer()
 def stem_sentence(sentence, stemmer):
-    words = sentence.split(' ')
     stemmed_words = [stemmer.stem(word) for word in words]
-    stemmed_sentence = ' '.join(stemmed_words)
     return stemmed_sentence
 def english_stemmer(strings):
-    list_ingredients = strings.split(' ')
-    stemmed_list = [stem_sentence(ingredient, en_stemmer) for ingredient in list_ingredients]
-    strings = ' '.join(stemmed_list)
     return strings
 def french_stemmer(strings):
-    list_ingredients = strings.split(',')
-    stemmed_list = [stem_sentence(ingredient, fr_stemmer) for ingredient in list_ingredients]
-    strings = ' '.join(stemmed_list)
     return strings
-def cosine_similarity_between_expressions(expr1, expr2):
     vectorizer = CountVectorizer()
     vectors = vectorizer.fit_transform([expr1, expr2])
@@ -125,35 +170,88 @@ def cosine_similarity_between_expressions(expr1, expr2):
     return similarity[0][0]
-def ajout_simularite(data) :
-    data["ITEM_DESC_avant_clean"] = data["ITEM_DESC_x"].apply(data_cleaning)
-    data["ITEM_DESC_apres_clean"] = data["ITEM_DESC_y"].apply(data_cleaning)
-    stop = stopwords.words('french')
-    data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
-    data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
-    stop = stopwords.words('english')
-    data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
-    data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
-    data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(remove_stop_words)
-    data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(remove_stop_words)
-    data['ITEM_DESC_avant_clean'] = data['ITEM_DESC_avant_clean'].apply(standardization)
-    data['ITEM_DESC_apres_clean'] = data['ITEM_DESC_apres_clean'].apply(standardization)
-    data["Cosinus similarité"]  =  data.apply(lambda row: cosine_similarity_between_expressions(row['ITEM_DESC_apres_clean'], row['ITEM_DESC_avant_clean']), axis=1)
-    return data
-def display_data_with_download_button(df, title="Data without decision-making"):
-    if df.empty :
         st.write("No result for the above criterion ")
-    else :
         st.subheader(title)
         df.loc[:, "Evaluation"] = True
         edited_df = st.data_editor(df)
         csv_data = convert_df(edited_df)
-        st.download_button(label="Download data as CSV", data=csv_data, file_name=f'{title}.csv', mime='text/csv')

 import re
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+from typing import Callable
 from App.utils.standadisation import *
 from nltk.corpus import stopwords
 from nltk.stem import PorterStemmer
 @st.cache_data
 def convert_df(df):
+    return df.to_csv().encode("utf-8")
 @st.cache_data
 def supprime_country(df):
+    try:
+        df.drop(["Country"], axis=1, inplace=True)
+    except:
+        try:
+            df.drop(["COUNTRY_KEY"], axis=1, inplace=True)
+        except:
+            try:
+                df.drop(["COUNTRY"], axis=1, inplace=True)
+            except:
+                pass
     return df
+def merge_and_update_classification(
+    main_df, update_df, product_id_col, classification_col
+):
+    """
+    Merge two DataFrames and update the classification based on the update_df.
+    Only rows where the classification has changed are retained.
+    Args:
+    main_df (pd.DataFrame): The main DataFrame containing original data.
+    update_df (pd.DataFrame): DataFrame containing updated classifications.
+    product_id_col (str): Name of the column used as the product identifier.
+    classification_col (str): Name of the classification column to be updated.
+    Returns:
+    pd.DataFrame: A DataFrame containing only the rows where classification was updated.
+    """
+    # Get unique product IDs from the update DataFrame
+    update_product_ids = update_df[product_id_col].unique()
+    # Filter main DataFrame to include only products in the update DataFrame
+    filtered_main_df = main_df[main_df[product_id_col].isin(update_product_ids)]
+    # Preserve the original classification
+    original_classification_col = f"original_{classification_col}"
+    filtered_main_df[original_classification_col] = filtered_main_df[classification_col]
+    # Merge DataFrames
+    merged_df = pd.merge(
+        filtered_main_df,
+        update_df,
+        on=[product_id_col],
+        how="inner",
+        suffixes=("_main", "_update"),
+        indicator=True,
+    )
+    # Update classification, keeping original if update is NaN
+    merged_df[classification_col] = merged_df[f"{classification_col}_update"].fillna(
+        merged_df[original_classification_col]
+    )
+    # Keep only rows where classification has changed
+    updated_df = merged_df[
+        merged_df[f"{classification_col}_main"]
+        != merged_df[f"{classification_col}_update"]
+    ]
+    # Remove merge indicator column
+    final_df = updated_df.drop(columns=["_merge"])
+    return final_df
+def data_cleaning_func(strings):
     strings = strings.lower().strip()
+    strings = strings.replace("'", " ")
+    strings = strings.replace("/", " ")
+    strings = re.sub(r"[^\w\s]", " ", strings)
+    text_normalized = re.sub("[^A-Za-z ,éêèîôœàâ]+", " ", strings)
     return text_normalized
+def standardization_func(strings):
+    liste = strings.split(" ")
+    for i in range(len(liste)):
+        if liste[i] in dictionnaire.keys():
+            liste[i] = dictionnaire[liste[i]]
+    return " ".join(liste)
+def remove_stop_words_func(strings):
     liste_stopword_unicode = [str(item) for item in liste_stopword]
+    en_stops = set(stopwords.words("english") + liste_stopword_unicode)
+    fr_stops = set(stopwords.words("french") + liste_stopword_unicode)
+    list_DESCRIPTION = strings.split(" ")
     cleaned_list = []
     for ingredient in list_DESCRIPTION:
+        temp = ingredient.split(" ")
+        cleaned_ingredient = " ".join(
+            [word for word in temp if word.lower() not in en_stops]
+        )
         cleaned_list.append(cleaned_ingredient)
+    strings = " ".join([ingredient for ingredient in cleaned_list])
+    list_DESCRIPTION = strings.split(" ")
     cleaned_list = []
     for ingredient in list_DESCRIPTION:
+        temp = ingredient.split(" ")
+        cleaned_ingredient = " ".join(
+            [word for word in temp if word.lower() not in fr_stops]
+        )
         cleaned_list.append(cleaned_ingredient)
+    strings = " ".join([ingredient for ingredient in cleaned_list])
     return strings
 def stem_sentence(sentence, stemmer):
+    words = sentence.split(" ")
     stemmed_words = [stemmer.stem(word) for word in words]
+    stemmed_sentence = " ".join(stemmed_words)
     return stemmed_sentence
 def english_stemmer(strings):
+    list_ingredients = strings.split(" ")
+    stemmed_list = [
+        stem_sentence(ingredient, en_stemmer) for ingredient in list_ingredients
+    ]
+    strings = " ".join(stemmed_list)
     return strings
 def french_stemmer(strings):
+    list_ingredients = strings.split(",")
+    stemmed_list = [
+        stem_sentence(ingredient, fr_stemmer) for ingredient in list_ingredients
+    ]
+    strings = " ".join(stemmed_list)
     return strings
+def cosine_similarity_func(expr1, expr2):
     vectorizer = CountVectorizer()
     vectors = vectorizer.fit_transform([expr1, expr2])
     return similarity[0][0]
+def add_text_similarity(
+    df: pd.DataFrame,
+    data_cleaning_func: Callable = data_cleaning_func,
+    remove_stop_words_func: Callable = remove_stop_words_func,
+    standardization_func: Callable = standardization_func,
+    cosine_similarity_func: Callable = cosine_similarity_func,
+) -> pd.DataFrame:
+    """
+    Add text similarity measures to the DataFrame based on item descriptions.
+    Args:
+    df (pd.DataFrame): Input DataFrame containing item descriptions.
+    data_cleaning_func (Callable): Function to clean the text data.
+    remove_stop_words_func (Callable): Function to remove stop words.
+    standardization_func (Callable): Function to standardize text.
+    cosine_similarity_func (Callable): Function to calculate cosine similarity.
+    Returns:
+    pd.DataFrame: DataFrame with added text similarity measures.
+    """
+    # Clean item descriptions
+    df["ITEM_DESC_before_clean"] = df["ITEM_DESC_main"].apply(
+        data_cleaning_func
+    )
+    df["ITEM_DESC_after_clean"] = df["ITEM_DESC_update"].apply(
+        data_cleaning_func
+    )
+    # Remove stop words (French and English)
+    for language in ["french", "english"]:
+        stop_words = set(stopwords.words(language))
+        for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
+            df[col] = df[col].apply(
+                lambda x: " ".join(
+                    word for word in x.split() if word.lower() not in stop_words
+                )
+            )
+    # Apply custom stop words removal
+    for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
+        df[col] = df[col].apply(remove_stop_words_func)
+    # Standardize text
+    for col in ["ITEM_DESC_before_clean", "ITEM_DESC_after_clean"]:
+        df[col] = df[col].apply(standardization_func)
+    # Calculate cosine similarity
+    df["Cosine_Similarity"] = df.apply(
+        lambda row: cosine_similarity_func(
+            row["ITEM_DESC_after_clean"], row["ITEM_DESC_before_clean"]
+        ),
+        axis=1,
+    )
+    return df
+def display_data_with_download_button(
+    df,
+    title="Data without decision-making"
+) -> None:
+    if df.empty:
         st.write("No result for the above criterion ")
+    else:
         st.subheader(title)
         df.loc[:, "Evaluation"] = True
         edited_df = st.data_editor(df)
         csv_data = convert_df(edited_df)
+        try:
+            st.download_button(
+                label="Download data as CSV",
+                data=csv_data,
+                file_name=f"{title}.csv",
+                mime="text/csv",
+                key=title,
+            )
+        except:
+            st.download_button(
+                label="Download data as CSV",
+                data=csv_data,
+                file_name=f"{title}.csv",
+                mime="text/csv",
+                key=title + "1",
+            )

App/utils/filter_dataframe.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pandas as pd
 import streamlit as st
 from pandas.api.types import (
     is_categorical_dtype,
@@ -8,8 +9,7 @@ from pandas.api.types import (
 )
-def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     """
     Adds a UI on top of a dataframe to let viewers filter columns
@@ -19,7 +19,10 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Filtered dataframe
     """
-    modify = st.checkbox("Add filters")
     if not modify:
         return df
@@ -30,7 +33,7 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     for col in df.columns:
         if is_object_dtype(df[col]):
             try:
-                df[col] = pd.to_datetime(df[col])
             except Exception:
                 pass
@@ -40,7 +43,11 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
     modification_container = st.container()
     with modification_container:
-        to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
         for column in to_filter_columns:
             left, right = st.columns((1, 20))
             left.write("↳")
@@ -73,7 +80,12 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
                     ),
                 )
                 if len(user_date_input) == 2:
-                    user_date_input = tuple(map(pd.to_datetime, user_date_input))
                     start_date, end_date = user_date_input
                     df = df.loc[df[column].between(start_date, end_date)]
             else:
@@ -81,6 +93,6 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
                     f"Substring or regex in {column}",
                 )
                 if user_text_input:
-                    df = df[df[column].str.contains(user_text_input)]
     return df

 import pandas as pd
+import numpy as np
 import streamlit as st
 from pandas.api.types import (
     is_categorical_dtype,
 )
+def filter_dataframe(df: pd.DataFrame, key: str = "filter_dataframe_on") -> pd.DataFrame:
     """
     Adds a UI on top of a dataframe to let viewers filter columns
     Returns:
         pd.DataFrame: Filtered dataframe
     """
+    modify = st.checkbox(
+        "Add filters",
+        key=key + "checkbox"
+    )
     if not modify:
         return df
     for col in df.columns:
         if is_object_dtype(df[col]):
             try:
+                df[col] = pd.to_datetime(df[col], format='%Y-%m-%d %H:%M:%S')
             except Exception:
                 pass
     modification_container = st.container()
     with modification_container:
+        to_filter_columns = st.multiselect(
+            "Filter dataframe on",
+            df.columns,
+            key=key + "multiselect"
+        )
         for column in to_filter_columns:
             left, right = st.columns((1, 20))
             left.write("↳")
                     ),
                 )
                 if len(user_date_input) == 2:
+                    user_date_input = tuple(
+                        map(
+                            pd.to_datetime,
+                            user_date_input
+                            )
+                        )
                     start_date, end_date = user_date_input
                     df = df.loc[df[column].between(start_date, end_date)]
             else:
                     f"Substring or regex in {column}",
                 )
                 if user_text_input:
+                    df = df[df[column].astype(str).str.contains(user_text_input, case=False, na=False)]
     return df

App/utils/priorite_pays.py CHANGED Viewed

@@ -1,10 +1,54 @@
-pays_all = ["FRA", "BEL" ,"ESP","ITA", "BRA","ATA","ARG", "POL", "ROU", "BIG","SAM",
-           "UAE","SAU","KWT","OMN","BHR","QAT","JOR","EGY","ARM","UZB","IRN","KEN","GEO","LEB","UGA","PAK","IRQ",
-           "MTQ","GLP","REU","GUA","MTS","GLS","GUF","MTA","GLA","GUS","SXM","DOM",
-           "MAR","AMA","TUN","DZA","TUR","IAP","IET","TWN"]
 dico = {}
 for i in pays_all:
-  dico[i] = len(pays_all) - pays_all.index(i)

+pays_all = [
+    "FRA",
+    "BEL",
+    "ESP",
+    "ITA",
+    "BRA",
+    "ATA",
+    "ARG",
+    "POL",
+    "ROU",
+    "BIG",
+    "SAM",
+    "UAE",
+    "SAU",
+    "KWT",
+    "OMN",
+    "BHR",
+    "QAT",
+    "JOR",
+    "EGY",
+    "ARM",
+    "UZB",
+    "IRN",
+    "KEN",
+    "GEO",
+    "LEB",
+    "UGA",
+    "PAK",
+    "IRQ",
+    "MTQ",
+    "GLP",
+    "REU",
+    "GUA",
+    "MTS",
+    "GLS",
+    "GUF",
+    "MTA",
+    "GLA",
+    "GUS",
+    "SXM",
+    "DOM",
+    "MAR",
+    "AMA",
+    "TUN",
+    "DZA",
+    "TUR",
+    "IAP",
+    "IET",
+    "TWN",
+]
 dico = {}
 for i in pays_all:
+    dico[i] = len(pays_all) - pays_all.index(i)

App/utils/standadisation.py CHANGED Viewed

@@ -1,17 +1,125 @@
-dictionnaire = {"rg": "rouge","rges" : "rouge","rge": "rouge", "rse": "rose" ,"rs" : "rose", "bl": "blanc", "bdx": "Bordeaux",
-                "vdt": "vin de table", 'vdp': "vin de pays","blc": "blanc", "bib": "bag in box", "citr": "citron", "co": "coco", "gourm" : "gourmand",
-                "patis": "patisserie", "p'tits" : "petit", "p'tit": "petit","p tit": "petit",  "pt": "pepite", "rev": "revil","succ": "sucettes",
-                "succet": "sucettes", "chocohouse": "choco house", "sach": "sachet", "choc": "choco", "tab" : "tablette", "hte" : "haute",
-                "spagh" : "spaghetti", "scht": "sachet", "nr": "noir", "caf": "cafe","barr": "barre", "pces": "pieces","pc": "pieces", "acidu": "acidule","blnc": "blanc",
-                "frui" : "fruit", "gourman" : "gourmand","bte" : "boîte", "bt" : "boîte", "ptit": "petit", "corb": "corbeil","ptits": "petit", "pti": "petit", "nois": "noisette",
-                "poul": "poulain", "barq" : "barquette", "barqu" : "barquette", 'fizz': 'fizzy', "st": "saint", "mich": "michel", "cal" : "calendrier", "calend" : "calendrier",
-                "calendr" : "calendrier", "caram" : "caramel", "cava" : "cavalier", "har" : "haribo", 'choc' : "chocolat", "choco" :"chocolat", 'lt' : "lait", "choc'n" :"chocolat noir",
-                "choc n" :"chocolat noir", "degust" : "degustation", "degus" : "degustation", "bis" : "biscuit", "coffr" : "coffret", "coff" : "coffret", "conf" : "confiserie",
-                "confis" : "confiserie", "croco" : "crocodile", "dble" : "double", "dess" : "dessert", "doyp" : "doypack", "harib" : "harib" , "et" : "etui", "exc" : "excellence",
-                "excel" : "excellence", "frit" : "friture","fritu" : "friture","fritur" : "friture", "gd" : "grand",  "gr" : "grand",  "grd" : "grand",  "grchoc" : "grand chocolat", "lat" : "lait", 'ass' : "assorti", "assoti" :"assorti",
-                "noug" : "nougatine", "nougat" : "nougatine", "scht" : "sachet", "sct" : "secret", "cho" : "chocolat" , "bisc" : "biscuit", "am" : "amande", "liq" : "liqueur", "tabl" : "tablette","asst":"assorti",
-                 "tab" : "tablette", "bil" : "bille", "vali" : "valisette", "cda" : "chevaliers d argouges", "tub": "tubo", "gril" :"grille", "amandesgrilles" : "amandes grilles", "ball" : "ballotin",
-                 "piecestubo" : "pieces tubo"
-                }
-liste_stopword = ['oz', 'kg', 'g', 'lb', 'mg', 'l', 'cl', 'ml', 'tsp', 'tbsp', 'cm', 'x', 'cte', 'h',"unknown"]

+dictionnaire = {
+    "rg": "rouge",
+    "rges": "rouge",
+    "rge": "rouge",
+    "rse": "rose",
+    "rs": "rose",
+    "bl": "blanc",
+    "bdx": "Bordeaux",
+    "vdt": "vin de table",
+    "vdp": "vin de pays",
+    "blc": "blanc",
+    "bib": "bag in box",
+    "citr": "citron",
+    "co": "coco",
+    "gourm": "gourmand",
+    "patis": "patisserie",
+    "p'tits": "petit",
+    "p'tit": "petit",
+    "p tit": "petit",
+    "pt": "pepite",
+    "rev": "revil",
+    "succ": "sucettes",
+    "succet": "sucettes",
+    "chocohouse": "choco house",
+    "sach": "sachet",
+    "tab": "tablette",
+    "hte": "haute",
+    "spagh": "spaghetti",
+    "scht": "sachet",
+    "nr": "noir",
+    "caf": "cafe",
+    "barr": "barre",
+    "pces": "pieces",
+    "pc": "pieces",
+    "acidu": "acidule",
+    "blnc": "blanc",
+    "frui": "fruit",
+    "gourman": "gourmand",
+    "bte": "boîte",
+    "bt": "boîte",
+    "ptit": "petit",
+    "corb": "corbeil",
+    "ptits": "petit",
+    "pti": "petit",
+    "nois": "noisette",
+    "poul": "poulain",
+    "barq": "barquette",
+    "barqu": "barquette",
+    "fizz": "fizzy",
+    "st": "saint",
+    "mich": "michel",
+    "cal": "calendrier",
+    "calend": "calendrier",
+    "calendr": "calendrier",
+    "caram": "caramel",
+    "cava": "cavalier",
+    "har": "haribo",
+    "choc": "chocolat",
+    "choco": "chocolat",
+    "lt": "lait",
+    "choc'n": "chocolat noir",
+    "choc n": "chocolat noir",
+    "degust": "degustation",
+    "degus": "degustation",
+    "bis": "biscuit",
+    "coffr": "coffret",
+    "coff": "coffret",
+    "conf": "confiserie",
+    "confis": "confiserie",
+    "croco": "crocodile",
+    "dble": "double",
+    "dess": "dessert",
+    "doyp": "doypack",
+    "harib": "harib",
+    "et": "etui",
+    "exc": "excellence",
+    "excel": "excellence",
+    "frit": "friture",
+    "fritu": "friture",
+    "fritur": "friture",
+    "gd": "grand",
+    "gr": "grand",
+    "grd": "grand",
+    "grchoc": "grand chocolat",
+    "lat": "lait",
+    "ass": "assorti",
+    "assoti": "assorti",
+    "noug": "nougatine",
+    "nougat": "nougatine",
+    "scht": "sachet",
+    "sct": "secret",
+    "cho": "chocolat",
+    "bisc": "biscuit",
+    "am": "amande",
+    "liq": "liqueur",
+    "tabl": "tablette",
+    "asst": "assorti",
+    "tab": "tablette",
+    "bil": "bille",
+    "vali": "valisette",
+    "cda": "chevaliers d argouges",
+    "tub": "tubo",
+    "gril": "grille",
+    "amandesgrilles": "amandes grilles",
+    "ball": "ballotin",
+    "piecestubo": "pieces tubo",
+}
+liste_stopword = [
+    "oz",
+    "kg",
+    "g",
+    "lb",
+    "mg",
+    "l",
+    "cl",
+    "ml",
+    "tsp",
+    "tbsp",
+    "cm",
+    "x",
+    "cte",
+    "h",
+    "unknown",
+]

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
-import streamlit as st
 import requests
 # Configuration
 st.set_page_config(
     page_title="Recherche",
-    page_icon="images/logo.png",
-     layout="wide",
-     initial_sidebar_state="auto"
 )
 change_footer_style = """
             <style>
@@ -27,7 +26,6 @@ def get_product_info(EAN):
         return {"error": "Product not found"}
 """ Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
 Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
@@ -35,7 +33,7 @@ Ici, vous pouvez rechercher des informations détaillées sur une multitude de p
 Profitez de votre exploration! 🕵️‍♀️
 """
 # Test de la fonction
-EAN =st.text_input("EAN", '0737628064502') # remplacer par l'EAN du produit
-if EAN :
     product_info = get_product_info(EAN)
-    st.json(product_info)

+import streamlit as st
 import requests
 # Configuration
 st.set_page_config(
     page_title="Recherche",
+    page_icon="images/logo.png",
+    layout="wide",
+    initial_sidebar_state="auto",
 )
 change_footer_style = """
             <style>
         return {"error": "Product not found"}
 """ Bienvenue sur notre site de web scraping dédié à la recherche d’informations sur les produits disponibles sur Open Food Facts! 🎉
 Ici, vous pouvez rechercher des informations détaillées sur une multitude de produits simplement en utilisant leur code EAN. Nous nous efforçons de fournir des informations précises et à jour pour vous aider à prendre des décisions éclairées sur les produits que vous consommez.
 Profitez de votre exploration! 🕵️‍♀️
 """
 # Test de la fonction
+EAN = st.text_input("EAN", "0737628064502")  # remplacer par l'EAN du produit
+if EAN:
     product_info = get_product_info(EAN)
+    st.json(product_info)

pages/🤖_Gestion_de_rupture_famille.py CHANGED Viewed

@@ -1,170 +1,262 @@
-import streamlit as st
-import pandas as pd
-import time
-from App.class_input_box.input_box import *
-from App.functions_rupture.functions_gestion import *
-from App.utils.divers_function import *
-from App.utils.filter_dataframe import *
-from App.utils.filter_dataframe import *
-# Page configuration
 def config_page():
     st.set_page_config(
         page_title="Gestion des ruptures",
         page_icon="images/Carrefour_logo.png",
-        layout="wide"
     )
     hide_streamlit_style = """
-                <style>
-                    footer {visibility: hidden;}
-                </style>
-                """
     st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-def app():
-        st.title("Gestion des ruptures famille ")
-        input_box = InputsBox()
-        data = input_box.get_data()
-        try:
-            if data.shape[0] != 0 :
-                st.header("Data")
-                st.dataframe(filter_dataframe(data))
-                "## Parameters"
-                col1, col2 = st.columns(2)
-                with col1 :
-                    product_id = input_box.get_product_id()
-                with col2 :
-                    class_id = input_box.get_class_id()
-                '## Filters'
-                col1, col2 = st.columns(2)
-                with col1 :
-                    min_product_id = input_box.valid_produict_id()
-                with col2 :
-                    vaind_class_id = input_box.valid_class_id()
-                columns1, columns2, columns3 = st.columns(3)
-                with columns1:
-                    nb_countries = input_box.get_number_countries()
-                with columns2 :
-                    proportion = input_box.get_proportion()
-                with columns3 :
-                    show_proportion = input_box.show_proportion()
-                # excution
-                if st.button("RUN ", key="run_button"):
-                    data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
-                    Country, merged = nouvelle_data(data,
-                                                    str(product_id),
-                                                    str(class_id))
-                    data_with_pro = finale_merged(merged,
-                                                 Country,
-                                                 product_id,
-                                                 class_id)
-                    if show_proportion :
-                        display_data_with_download_button(data_with_pro, title="Show data with ratios")
-                    """## The data below is filtered as follows: """
-                    "- Number of countries greater than or equal to ", nb_countries
-                    "- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
-                    data_countries_ratio = cond_pays_proportion(data_with_pro,
-                                                                nb_countries,
-                                                                proportion,
-                                                                product_id)
-                    if data_countries_ratio.empty :
-                        st.write("No result for the above criterion ")
-                    else :
-                        df = supprime_country(data_countries_ratio)
-                        max_number_index = df.groupby(product_id)['nombre'].idxmax()
-                        df_max_number = df.loc[max_number_index]
-                        df_max_number.drop(["Countries"], axis = 1, inplace =True)
-                        finale_df = Merger(data,
-                                           df_max_number,
-                                           product_id,
-                                           class_id)
-                        tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
-                        with tab1 :
-                            display_data_with_download_button(df, title="Data without decision-making")
-                        with tab2 :
-                            display_data_with_download_button(finale_df, title="Data with proposed changes")
-                    "## Country priority "
-                    priority_data, df_equa, df_nequa = cond_pays_priorite(data_with_pro, product_id)
-                    tab1, tab2, tab3, tab4 = st.tabs(["Data without decision-making", "Equality case and more than 1", "Cases of inequality", "Data with proposed changes more than 2"])
-                    with tab1 :
-                        display_data_with_download_button(priority_data, title="Data without decision-making")
-                    with tab2 :
-                        display_data_with_download_button(df_equa, title="Equality case")
-                    with tab3 :
-                        df_nequa_ = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) > 2))]
-                        display_data_with_download_button(df_nequa_, title="Cases of inequality")
-                    max_poids_index = df_nequa_.groupby(product_id)['Poids'].idxmax()
-                    df_max_poids = df_nequa_.loc[max_poids_index]
-                    df_max_poids.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
-                    finale_df_  =  Merger(data,df_max_poids,  product_id, class_id)
-                    with tab4 :
-                        display_data_with_download_button(finale_df_, title="Data with proposed changes more than 2")
-                    # one vs one
-                    df_nequa_1 = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) == 2))]
-                    max_poids_index1 = df_nequa_1.groupby(product_id)['Poids'].idxmax()
-                    df_max_poids1 = df_nequa_1.loc[max_poids_index1]
-                    df_max_poids1.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
-                    finale_df_1  =  ajout_simularite(Merger(data,df_max_poids1,  product_id, class_id))
-                    display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
-                    st.success('Done!', icon="✅")
-                    st.balloons()
-        except:
-            pass
-            #st.error('This is an error', icon="🚨")
-            st.info('Ensure that column names are capitalized and that product_id and class_id descriptions are present, as well as a country column.', icon="ℹ️")
 if __name__ == "__main__":
-    lien_label = "Example of input"
-    lien_url = "https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905"
-    lien_html = f'<a href="{lien_url}">{lien_label}</a>'
-    lien_label_ = "Documentation utilisateur"
-    lien_url_ = "https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing"
-    lien_html_ = f'<a href="{lien_url_}">{lien_label_}</a>'
     config_page()
-    st.sidebar.markdown(lien_html_, unsafe_allow_html=True)
-    st.sidebar.markdown(lien_html, unsafe_allow_html=True)
     app()

+import streamlit as st
+from App.class_input_box.input_box import InputsBox
+from App.functions_rupture.functions_gestion import (
+    filter_data_with_valid_keys,
+    process_new_data,
+    finalize_merged_data,
+    filter_by_country_and_proportion,
+    process_country_priority,
+)
+from App.utils.divers_function import (
+    display_data_with_download_button,
+    supprime_country,
+    merge_and_update_classification,
+    add_text_similarity
+)
+from App.utils.filter_dataframe import filter_dataframe
+import logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 def config_page():
     st.set_page_config(
         page_title="Gestion des ruptures",
         page_icon="images/Carrefour_logo.png",
+        layout="wide",
     )
     hide_streamlit_style = """
+        <style>
+            footer {visibility: hidden;}
+        </style>
+    """
     st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+def display_filters(input_box):
+    col1, col2 = st.columns(2)
+    with col1:
+        product_id = input_box.get_product_id()
+        min_product_id = input_box.valid_produict_id()
+    with col2:
+        class_id = input_box.get_class_id()
+        valid_class_id = input_box.valid_class_id()
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        nb_countries = input_box.get_number_countries()
+    with col2:
+        proportion = input_box.get_proportion()
+    with col3:
+        show_proportion = input_box.show_proportion()
+    return (
+        product_id,
+        class_id,
+        min_product_id,
+        valid_class_id,
+        nb_countries,
+        proportion,
+        show_proportion,
+    )
+def process_data(
+    data,
+    product_id,
+    class_id,
+    min_product_id,
+    valid_class_id,
+    nb_countries,
+    proportion,
+    show_proportion,
+):
+    logging.debug(f"Starting process_data with product_id: {product_id}, class_id: {class_id}")
+    data = filter_data_with_valid_keys(
+        data, product_id, class_id, min_product_id, valid_class_id
+    )
+    # st.dataframe(data)
+    logging.debug("Data filtered with valid keys")
+    Country, merged = process_new_data(data, str(product_id), class_id)
+    logging.debug("New data processed")
+    data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
+    # st.dataframe(data_with_pro)
+    logging.debug("Merged data finalized")
+    if show_proportion:
+        logging.info("ibra 1 ")
+        display_data_with_download_button(data_with_pro, title="Show data with ratios")
+        logging.info("ibra 2")
+    st.write("## The data below is filtered as follows:")
+    st.write(f"- Number of countries greater than or equal to {nb_countries}")
+    st.write(
+        f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
+    )
+    data_countries_ratio = filter_by_country_and_proportion(
+        data_with_pro, nb_countries, proportion, product_id
+    )
+    if data_countries_ratio.empty:
+        st.write("No result for the above criterion")
+    else:
+        display_filtered_data(data, data_countries_ratio, product_id, class_id)
+    display_country_priority(data_with_pro, data, product_id, class_id)
+def display_filtered_data(data, data_countries_ratio, product_id, class_id):
+    df = supprime_country(data_countries_ratio)
+    max_number_index = df.groupby(product_id)["count"].idxmax()
+    df_max_number = df.loc[max_number_index]
+    df_max_number.drop(["Countries"], axis=1, inplace=True)
+    finale_df = merge_and_update_classification(
+        data, df_max_number, product_id, class_id
+    )
+    tab1, tab2 = st.tabs(
+        ["Data without decision-making", "Data with proposed changes"]
+    )
+    with tab1:
+        display_data_with_download_button(
+            df,
+            title="Data without decision-making"
+        )
+    with tab2:
+        display_data_with_download_button(
+            finale_df,
+            title="Data with proposed changes"
+        )
+def display_country_priority(data_with_pro, data, product_id, class_id):
+    st.write("## Country priority")
+    priority_data, df_equa, df_nequa = process_country_priority(
+        data_with_pro, product_id
+    )
+    tab1, tab2, tab3, tab4 = st.tabs(
+        [
+            "Data without decision-making",
+            "Equality case and more than 1",
+            "Cases of inequality",
+            "Data with proposed changes more than 2",
+        ]
+    )
+    with tab1:
+        display_data_with_download_button(
+            priority_data, title="Data without decision-making"
+        )
+    with tab2:
+        display_data_with_download_button(df_equa, title="Equality case")
+    with tab3:
+        df_nequa_ = df_nequa[df_nequa.total_by_product.apply(
+            lambda x: int(x) > 2
+            )
+        ]
+        display_data_with_download_button(
+            df_nequa_,
+            title="Cases of inequality"
+        )
+    with tab4:
+        display_proposed_changes(df_nequa_, data, product_id, class_id)
+    display_one_vs_one(df_nequa, data, product_id, class_id)
+def display_proposed_changes(df_nequa_, data, product_id, class_id):
+    max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
+    df_max_poids = df_nequa_.loc[max_poids_index]
+    df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
+    finale_df_ = merge_and_update_classification(
+        data, df_max_poids, product_id, class_id
+    )
+    display_data_with_download_button(
+        finale_df_, title="Data with proposed changes more than 2"
+    )
+def display_one_vs_one(df_nequa, data, product_id, class_id):
+    df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
+    max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
+    df_max_poids1 = df_nequa_1.loc[max_poids_index1]
+    df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
+    finale_df_1 = add_text_similarity(
+        merge_and_update_classification(
+            data,
+            df_max_poids1,
+            product_id, class_id)
+    )
+    display_data_with_download_button(
+        finale_df_1, title=" One vs One with similarity score"
+    )
+def app():
+    logging.info("Starting app function")
+    st.title("Gestion des ruptures famille")
+    input_box = InputsBox()
+    data = input_box.get_data()
+    logging.debug(f"Data retrieved: {data is not None}")
+    if data is not None and data.shape[0] != 0:
+        logging.info("Data is valid, proceeding with processing")
+        st.header("Data")
+        st.dataframe(filter_dataframe(data))
+        st.header("Parameters")
+        (
+            product_id,
+            class_id,
+            min_product_id,
+            valid_class_id,
+            nb_countries,
+            proportion,
+            show_proportion,
+        ) = display_filters(input_box)
+        list_product_selected = filter_dataframe(
+            data,
+            'data_filter_by_holding'
+        )[product_id].unique().tolist()
+        if list_product_selected is not None and len(list_product_selected) > 0:
+            data_selected = data[data[product_id].isin(list_product_selected)]
+        else:
+            st.warning("No addictionnal filter selecting")
+            data_selected = data.copy()
+        if st.button("RUN", key="run_button"):
+            try:
+                process_data(
+                    data_selected,
+                    product_id,
+                    class_id,
+                    min_product_id,
+                    valid_class_id,
+                    nb_countries,
+                    proportion,
+                    show_proportion,
+                )
+                st.success("Done!", icon="✅")
+                st.balloons()
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}", icon="🚨")
+    else:
+        logging.warning("Data is None or empty")
+        st.info(
+            """Ensure that column names are capitalized and that product_id
+            and class_id descriptions are present, as well as a country
+            column.""",
+            icon="ℹ️",
+        )
+    logging.info("App function completed")
 if __name__ == "__main__":
     config_page()
+    st.sidebar.markdown(
+        '<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
+        unsafe_allow_html=True,
+    )
+    st.sidebar.markdown(
+        '<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
+        unsafe_allow_html=True,
+    )
     app()

pages/🦾_Gestion_de_rupture_sous_famille.py CHANGED Viewed

@@ -1,172 +1,261 @@
-import streamlit as st
-import pandas as pd
-import time
-from App.class_input_box.input_box import *
-from App.functions_rupture.functions_gestion import *
-from App.utils.divers_function import *
-from App.utils.filter_dataframe import *
-from App.utils.filter_dataframe import *
-# Page configuration
-st.set_page_config(
-    page_title="Gestion des ruptures",
-    page_icon="images/Carrefour_logo.png",
-    layout="wide"
 )
-hide_streamlit_style = """
-            <style>
-                footer {visibility: hidden;}
-            </style>
-            """
-st.markdown(hide_streamlit_style, unsafe_allow_html=True)
-def app():
-        st.title("Gestion des ruptures sous famille")
-        input_box = InputsBox()
-        data = input_box.get_data()
-        try:
-            if data.shape[0] != 0 :
-                st.header("Data")
-                st.dataframe(filter_dataframe(data))
-                "## Parameters"
-                col1, col2 = st.columns(2)
-                with col1 :
-                    product_id = input_box.get_product_id()
-                with col2 :
-                    class_id = input_box.get_class_id()
-                '## Filters'
-                col1, col2 = st.columns(2)
-                with col1 :
-                    min_product_id = input_box.valid_produict_id()
-                with col2 :
-                    vaind_class_id = input_box.valid_class_id()
-                columns1, columns2, columns3, columns4 = st.columns(4)
-                with columns1:
-                    nb_countries = input_box.get_number_countries()
-                with columns2 :
-                    proportion = input_box.get_proportion()
-                with columns3 :
-                    countries = input_box.get_countries()
-                with columns4 :
-                    show_proportion = input_box.show_proportion()
-                #execution
-                if st.button("RUN ", key="run_button"):
-                    data = data_with_valide_key(data, product_id, class_id, min_product_id, vaind_class_id )
-                    data = data[data.COUNTRY_KEY.isin(countries)]
-                    Country, merged = nouvelle_data(data,
-                                                    str(product_id),
-                                                    str(class_id))
-                    data_with_pro = finale_merged(merged,
-                                                 Country,
-                                                 product_id,
-                                                 class_id)
-                    if show_proportion :
-                        display_data_with_download_button(data_with_pro, title="Show data with ratios")
-                    """## The data below is filtered as follows: """
-                    "- Number of countries greater than or equal to ", nb_countries
-                    "- The proportion with the highest ", class_id ," is greater than or equal to ",proportion
-                    data_countries_ratio = cond_pays_proportion(data_with_pro,
-                                                                nb_countries,
-                                                                proportion,
-                                                                product_id)
-                    if data_countries_ratio.empty :
-                        st.write("No result for the above criterion ")
-                    else :
-                        df = supprime_country(data_countries_ratio)
-                        max_number_index = df.groupby(product_id)['nombre'].idxmax()
-                        df_max_number = df.loc[max_number_index]
-                        df_max_number.drop(["Countries"], axis = 1, inplace =True)
-                        finale_df = Merger(data,
-                                           df_max_number,
-                                           product_id,
-                                           class_id)
-                        tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
-                        with tab1 :
-                            display_data_with_download_button(df, title="Data without decision-making")
-                        with tab2 :
-                            display_data_with_download_button(finale_df, title="Data with proposed changes")
-                    "## Country priority "
-                    priority_data, df_equa, df_nequa = cond_pays_priorite(data_with_pro, product_id)
-                    tab1, tab2, tab3, tab4 = st.tabs(["Data without decision-making", "Equality case and mt1", "Cases of inequality", "Data with proposed changes mt2"])
-                    with tab1 :
-                        display_data_with_download_button(priority_data, title="Data without decision-making")
-                    with tab2 :
-                        display_data_with_download_button(df_equa, title="Equality case")
-                    with tab3 :
-                        df_nequa_ = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) > 2))]
-                        display_data_with_download_button(df_nequa_, title="Cases of inequality")
-                    max_poids_index = df_nequa_.groupby(product_id)['Poids'].idxmax()
-                    df_max_poids = df_nequa_.loc[max_poids_index]
-                    df_max_poids.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
-                    finale_df_  =  Merger(data,df_max_poids,  product_id, class_id)
-                    with tab4 :
-                        display_data_with_download_button(finale_df_, title="Data with proposed changes mt2")
-                    # one vs one
-                    df_nequa_1 = df_nequa[(df_nequa.total_by_line.apply(lambda x: int(x) == 2))]
-                    max_poids_index1 = df_nequa_1.groupby(product_id)['Poids'].idxmax()
-                    df_max_poids1 = df_nequa_1.loc[max_poids_index1]
-                    df_max_poids1.drop(["COUNTRY_KEY"], axis = 1, inplace= True)
-                    finale_df_1  =  ajout_simularite(Merger(data,df_max_poids1,  product_id, class_id))
-                    display_data_with_download_button(finale_df_1, title=" One vs One with similarity score")
-                    st.success('Done!', icon="✅")
-                    st.balloons()
-        except:
-            pass
-            #st.error('This is an error', icon="🚨")
-            st.info('Ensure that column names are capitalized and that product_id and class_id descriptions are present, as well as a country column.', icon="ℹ️")
 if __name__ == "__main__":
-    lien_label = "# Example of input"
-    lien_url = "https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905"
-    lien_html = f'<a href="{lien_url}">{lien_label}</a>'
-    lien_label_ = "Documentation utilisateur"
-    lien_url_ = "https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing"
-    lien_html_ = f'<a href="{lien_url_}">{lien_label_}</a>'
-    st.sidebar.markdown(lien_html_, unsafe_allow_html=True)
-    st.sidebar.markdown(lien_html, unsafe_allow_html=True)
     app()

+import streamlit as st
+from App.class_input_box.input_box import InputsBox
+from App.functions_rupture.functions_gestion import (
+    filter_data_with_valid_keys,
+    process_new_data,
+    finalize_merged_data,
+    filter_by_country_and_proportion,
+    process_country_priority,
 )
+from App.utils.divers_function import (
+    display_data_with_download_button,
+    supprime_country,
+    merge_and_update_classification,
+    add_text_similarity,
+)
+from App.utils.filter_dataframe import filter_dataframe
+import logging
+logging.basicConfig(
+    level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+def config_page():
+    st.set_page_config(
+        page_title="Gestion des ruptures",
+        page_icon="images/Carrefour_logo.png",
+        layout="wide",
+    )
+    hide_streamlit_style = """
+        <style>
+            footer {visibility: hidden;}
+        </style>
+    """
+    st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+def display_filters(input_box):
+    col1, col2 = st.columns(2)
+    with col1:
+        product_id = input_box.get_product_id()
+        min_product_id = input_box.valid_produict_id()
+    with col2:
+        class_id = input_box.get_class_id()
+        valid_class_id = input_box.valid_class_id()
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        nb_countries = input_box.get_number_countries()
+    with col2:
+        proportion = input_box.get_proportion()
+    with col3:
+        show_proportion = input_box.show_proportion()
+    countries = input_box.get_countries()
+    return (
+        product_id,
+        class_id,
+        min_product_id,
+        valid_class_id,
+        nb_countries,
+        proportion,
+        countries,
+        show_proportion,
+    )
+def process_data(
+    data,
+    product_id,
+    class_id,
+    min_product_id,
+    valid_class_id,
+    nb_countries,
+    proportion,
+    countries,
+    show_proportion,
+):
+    logging.debug(
+        f"Starting process_data with product_id: {product_id}, class_id: {class_id}"
+    )
+    data = filter_data_with_valid_keys(
+        data, product_id, class_id, min_product_id, valid_class_id
+    )
+    data = data[data.COUNTRY_KEY.isin(countries)]
+    logging.debug("Data filtered with valid keys and countries")
+    Country, merged = process_new_data(data, str(product_id), str(class_id))
+    logging.debug("New data processed")
+    data_with_pro = finalize_merged_data(merged, Country, product_id, class_id)
+    logging.debug("Merged data finalized")
+    if show_proportion:
+        logging.info("Displaying data with ratios")
+        display_data_with_download_button(data_with_pro, title="Show data with ratios")
+    st.write("## The data below is filtered as follows:")
+    st.write(f"- Number of countries greater than or equal to {nb_countries}")
+    st.write(
+        f"- The proportion with the highest {class_id} is greater than or equal to {proportion}"
+    )
+    data_countries_ratio = filter_by_country_and_proportion(
+        data_with_pro, nb_countries, proportion, product_id
+    )
+    if data_countries_ratio.empty:
+        st.write("No result for the above criterion")
+    else:
+        display_filtered_data(data, data_countries_ratio, product_id, class_id)
+    display_country_priority(data_with_pro, data, product_id, class_id)
+def display_filtered_data(data, data_countries_ratio, product_id, class_id):
+    df = supprime_country(data_countries_ratio)
+    max_number_index = df.groupby(product_id)["count"].idxmax()
+    df_max_number = df.loc[max_number_index]
+    df_max_number.drop(["Countries"], axis=1, inplace=True)
+    finale_df = merge_and_update_classification(
+        data, df_max_number, product_id, class_id
+    )
+    tab1, tab2 = st.tabs(["Data without decision-making", "Data with proposed changes"])
+    with tab1:
+        display_data_with_download_button(df, title="Data without decision-making")
+    with tab2:
+        display_data_with_download_button(finale_df, title="Data with proposed changes")
+def display_country_priority(data_with_pro, data, product_id, class_id):
+    st.write("## Country priority")
+    priority_data, df_equa, df_nequa = process_country_priority(
+        data_with_pro, product_id
+    )
+    tab1, tab2, tab3, tab4 = st.tabs(
+        [
+            "Data without decision-making",
+            "Equality case and more than 1",
+            "Cases of inequality",
+            "Data with proposed changes more than 2",
+        ]
+    )
+    with tab1:
+        display_data_with_download_button(
+            priority_data, title="Data without decision-making"
+        )
+    with tab2:
+        display_data_with_download_button(df_equa, title="Equality case")
+    with tab3:
+        df_nequa_ = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) > 2)]
+        display_data_with_download_button(df_nequa_, title="Cases of inequality")
+    with tab4:
+        display_proposed_changes(df_nequa_, data, product_id, class_id)
+    display_one_vs_one(df_nequa, data, product_id, class_id)
+def display_proposed_changes(df_nequa_, data, product_id, class_id):
+    max_poids_index = df_nequa_.groupby(product_id)["Weight"].idxmax()
+    df_max_poids = df_nequa_.loc[max_poids_index]
+    df_max_poids.drop(["COUNTRY_KEY"], axis=1, inplace=True)
+    finale_df_ = merge_and_update_classification(
+        data, df_max_poids, product_id, class_id
+    )
+    display_data_with_download_button(
+        finale_df_, title="Data with proposed changes more than 2"
+    )
+def display_one_vs_one(df_nequa, data, product_id, class_id):
+    df_nequa_1 = df_nequa[df_nequa.total_by_product.apply(lambda x: int(x) == 2)]
+    max_poids_index1 = df_nequa_1.groupby(product_id)["Weight"].idxmax()
+    df_max_poids1 = df_nequa_1.loc[max_poids_index1]
+    df_max_poids1.drop(["COUNTRY_KEY"], axis=1, inplace=True)
+    finale_df_1 = add_text_similarity(
+        merge_and_update_classification(data, df_max_poids1, product_id, class_id)
+    )
+    display_data_with_download_button(
+        finale_df_1, title=" One vs One with similarity score"
+    )
+def app():
+    logging.info("Starting app function")
+    st.title("Gestion des ruptures sous famille")
+    input_box = InputsBox()
+    data = input_box.get_data()
+    logging.debug(f"Data retrieved: {data is not None}")
+    if data is not None and data.shape[0] != 0:
+        logging.info("Data is valid, proceeding with processing")
+        st.header("Data")
+        st.dataframe(filter_dataframe(data))
+        st.header("Parameters")
+        (
+            product_id,
+            class_id,
+            min_product_id,
+            valid_class_id,
+            nb_countries,
+            proportion,
+            countries,
+            show_proportion,
+        ) = display_filters(input_box)
+        list_product_selected = (
+            filter_dataframe(data, "data_filter_by_holding")[product_id]
+            .unique()
+            .tolist()
+        )
+        if list_product_selected is not None and len(list_product_selected) > 0:
+            data_selected = data[data[product_id].isin(list_product_selected)]
+        else:
+            st.warning("No additional filter selected")
+            data_selected = data.copy()
+        if st.button("RUN", key="run_button"):
+            try:
+                process_data(
+                    data_selected,
+                    product_id,
+                    class_id,
+                    min_product_id,
+                    valid_class_id,
+                    nb_countries,
+                    proportion,
+                    countries,
+                    show_proportion,
+                )
+                st.success("Done!", icon="✅")
+                st.balloons()
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}", icon="🚨")
+    else:
+        logging.warning("Data is None or empty")
+        st.info(
+            """Ensure that column names are capitalized and that product_id
+            and class_id descriptions are present, as well as a country
+            column.""",
+            icon="ℹ️",
+        )
+    logging.info("App function completed")
 if __name__ == "__main__":
+    config_page()
+    st.sidebar.markdown(
+        '<a href="https://docs.google.com/document/d/1WQwr5D87ZHSlBRWQw7KMbBhbEdFS4dlhltFDgZBNP4U/edit?usp=sharing">Documentation utilisateur</a>',
+        unsafe_allow_html=True,
+    )
+    st.sidebar.markdown(
+        '<a href="https://docs.google.com/spreadsheets/d/123hVTOFpBT-C6mCnrOBh8fFIhSi8FxiuyHZJAQu8bDc/edit#gid=1220891905">Example of input</a>',
+        unsafe_allow_html=True,
+    )
     app()