File size: 3,048 Bytes
d6d506e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58cba83
d6d506e
 
58cba83
d6d506e
 
 
58cba83
d6d506e
 
 
 
 
 
 
 
 
 
58cba83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6d506e
 
 
58cba83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import requests
from io import BytesIO

# URL du fichier principal
MAIN_DATA_URL = "https://raw.githubusercontent.com/M00N69/RASFFPORTAL/main/unified_rasff_data_with_grouping.csv"

# Charger les données principales
def load_data(url: str) -> pd.DataFrame:
    try:
        df = pd.read_csv(url, parse_dates=['Date of Case'])
        df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns]  # Standardiser les noms de colonnes
        return df
    except Exception as e:
        raise Exception(f"Failed to load data: {e}")

# Télécharger et nettoyer les données hebdomadaires
def download_and_clean_weekly_data(year: int, weeks: list) -> pd.DataFrame:
    url_template = "https://www.sirene-diffusion.fr/regia/000-rasff/{}/rasff-{}-{}.xls"
    dfs = []
    
    for week in weeks:
        url = url_template.format(str(year)[2:], year, str(week).zfill(2))
        response = requests.get(url)
        
        if response.status_code == 200:
            try:
                df = pd.read_excel(BytesIO(response.content))
                df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns]  # Standardiser les noms de colonnes
                dfs.append(df)
            except Exception as e:
                print(f"Failed to process data for week {week}: {e}")
        else:
            print(f"Data for week {week} could not be downloaded.")

    return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

# Mappings pour les catégories de produits et de dangers
product_category_mapping = {
    "fruits and vegetables": ("Fruits & Veggies", "Fresh Produce"),
    "meat and meat products": ("Meat", "Animal Products"),
    "fish and fish products": ("Fish", "Seafood"),
    "dairy products": ("Dairy", "Animal Products"),
    "cereals and bakery products": ("Cereals", "Grains"),
    # Ajouter d'autres mappings ici
}

hazard_category_mapping = {
    "microbiological contamination": ("Microbial", "Biological"),
    "chemical contamination": ("Chemical", "Toxic"),
    "foreign bodies": ("Physical", "Foreign Objects"),
    "allergens": ("Allergens", "Health Risk"),
    # Ajouter d'autres mappings ici
}

# Appliquer les mappings de catégories
def apply_mappings(df: pd.DataFrame) -> pd.DataFrame:
    if 'product_category' in df.columns:
        df[['prodcat', 'groupprod']] = df['product_category'].apply(
            lambda x: pd.Series(product_category_mapping.get(str(x).lower(), ("Unknown", "Unknown")))
        )
    else:
        print("Column 'product_category' not found in DataFrame.")

    if 'hazard_category' in df.columns:
        df[['hazcat', 'grouphaz']] = df['hazard_category'].apply(
            lambda x: pd.Series(hazard_category_mapping.get(str(x).lower(), ("Unknown", "Unknown")))
        )
    else:
        print("Column 'hazard_category' not found in DataFrame.")
    
    return df

# Exemple d'utilisation
if __name__ == "__main__":
    df_main = load_data(MAIN_DATA_URL)
    df_main = apply_mappings(df_main)
    print(df_main.head())