| import pandas as pd | |
| from dotenv import load_dotenv | |
| import os | |
| load_dotenv() | |
| project_id = os.getenv('PROJECT_ID') | |
| data = pd.read_csv("dictionnaire.tsv", sep ="\t", encoding='utf-8') | |
| dictionnaire = data.set_index('ABBREVIATIONS')['CORRESPONDANCES'].to_dict() | |
| #dictionnaire = {"rg": "rouge","rges" : "rouge","rge": "rouge", "rse": "rose" ,"rs" : "rose", "bl": "blanc", "bdx": "Bordeaux", | |
| # "vdt": "vin de table", 'vdp': "vin de pays","blc": "blanc", "bib": "bag in box", "citr": "citron", "co": "coco", "gourm" : "gourmand", | |
| # "patis": "patisserie", "p'tits" : "petit", "p'tit": "petit","p tit": "petit", "pt": "pepite", "rev": "revil","succ": "sucettes", | |
| # "succet": "sucettes", "chocohouse": "choco house", "sach": "sachet", "choc": "choco", "tab" : "tablette", "hte" : "haute", | |
| # "spagh" : "spaghetti", "scht": "sachet", "nr": "noir", "caf": "cafe","barr": "barre", "pces": "pieces","pc": "pieces", "acidu": "acidule","blnc": "blanc", | |
| # "frui" : "fruit", "gourman" : "gourmand","bte" : "boîte", "bt" : "boîte", "ptit": "petit", "corb": "corbeil","ptits": "petit", "pti": "petit", "nois": "noisette", | |
| # "poul": "poulain", "barq" : "barquette", "barqu" : "barquette", 'fizz': 'fizzy', "st": "saint", "mich": "michel", "cal" : "calendrier", "calend" : "calendrier", | |
| # "calendr" : "calendrier", "caram" : "caramel", "cava" : "cavalier", "har" : "haribo", 'choc' : "chocolat", "choco" :"chocolat", 'lt' : "lait", "choc'n" :"chocolat noir", | |
| # "choc n" :"chocolat noir", "degust" : "degustation", "degus" : "degustation", "bis" : "biscuit", "coffr" : "coffret", "coff" : "coffret", "conf" : "confiserie", | |
| # "confis" : "confiserie", "croco" : "crocodile", "dble" : "double", "dess" : "dessert", "doyp" : "doypack", "harib" : "harib" , "et" : "etui", "exc" : "excellence", | |
| # "excel" : "excellence", "frit" : "friture","fritu" : "friture","fritur" : "friture", "gd" : "grand", "gr" : "grand", "grd" : "grand", "grchoc" : "grand chocolat", "lat" : "lait", 'ass' : "assorti", "assoti" :"assorti", | |
| # "noug" : "nougatine", "nougat" : "nougatine", "scht" : "sachet", "sct" : "secret", "cho" : "chocolat" , "bisc" : "biscuit", "am" : "amande", "liq" : "liqueur", "tabl" : "tablette","asst":"assorti", | |
| # "tab" : "tablette", "bil" : "bille", "vali" : "valisette", "cda" : "chevaliers d argouges", "tub": "tubo", "gril" :"grille", "amandesgrilles" : "amandes grilles", "ball" : "ballotin", | |
| # "piecestubo" : "pieces tubo" | |
| # } | |
| #Brand = pd.read_gbq(""" | |
| # SELECT DISTINCT | |
| # BEM_BRAND_DESC AS brand | |
| # FROM `c4-gdw-prd.products_referential.d_bem_product_barcode` Pdcts | |
| # WHERE Pdcts.BEM_BRAND_DESC NOT IN ("UNKNOWN") | |
| # AND Pdcts.BEM_BRAND_KEY<>'-1' | |
| # AND Pdcts.BEM_SECTOR_KEY="1" | |
| # AND Pdcts.COUNTRY_KEY IN ('FRA','BEL') | |
| # | |
| # UNION DISTINCT | |
| # | |
| # SELECT DISTINCT | |
| # BRAND_DESC AS brand | |
| # FROM `c4-gdw-prd.products_referential.d_bem_product_barcode` Pdcts | |
| # WHERE Pdcts.BRAND_DESC NOT IN ("UNKNOWN") | |
| # AND Pdcts.BRAND_KEY<>'-1' | |
| # AND Pdcts.BEM_SECTOR_KEY="1" | |
| # AND Pdcts.COUNTRY_KEY IN ('FRA','BEL')""", | |
| # project_id=project_id).brand.apply(lambda x: x.lower()) | |
| #liste_stopword = np.append(Brand.values.tolist(), ['oz', 'kg', 'g', 'lb', 'mg', 'l', 'cl', 'ml', 'tsp', 'tbsp', 'cm', 'x', 'cte', 'h', 'nux']) | |
| liste_stopword = ["unknown",'oz', 'kg', 'g', 'lb', 'mg', 'l', 'cl', 'ml', 'tsp', 'tbsp', 'cm', 'x', 'cte', 'h'] |