import pandas as pd import requests from io import BytesIO # URL du fichier principal MAIN_DATA_URL = "https://raw.githubusercontent.com/M00N69/RASFFPORTAL/main/unified_rasff_data_with_grouping.csv" # Charger les données principales def load_data(url: str) -> pd.DataFrame: try: df = pd.read_csv(url, parse_dates=['Date of Case']) df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes return df except Exception as e: raise Exception(f"Failed to load data: {e}") # Télécharger et nettoyer les données hebdomadaires def download_and_clean_weekly_data(year: int, weeks: list) -> pd.DataFrame: url_template = "https://www.sirene-diffusion.fr/regia/000-rasff/{}/rasff-{}-{}.xls" dfs = [] for week in weeks: url = url_template.format(str(year)[2:], year, str(week).zfill(2)) response = requests.get(url) if response.status_code == 200: try: df = pd.read_excel(BytesIO(response.content)) df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes dfs.append(df) except Exception as e: print(f"Failed to process data for week {week}: {e}") else: print(f"Data for week {week} could not be downloaded.") return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() # Mappings pour les catégories de produits et de dangers product_category_mapping = { "fruits and vegetables": ("Fruits & Veggies", "Fresh Produce"), "meat and meat products": ("Meat", "Animal Products"), "fish and fish products": ("Fish", "Seafood"), "dairy products": ("Dairy", "Animal Products"), "cereals and bakery products": ("Cereals", "Grains"), # Ajouter d'autres mappings ici } hazard_category_mapping = { "microbiological contamination": ("Microbial", "Biological"), "chemical contamination": ("Chemical", "Toxic"), "foreign bodies": ("Physical", "Foreign Objects"), "allergens": ("Allergens", "Health Risk"), # Ajouter d'autres mappings ici } # Appliquer les mappings de catégories def apply_mappings(df: pd.DataFrame) -> pd.DataFrame: if 'product_category' in df.columns: df[['prodcat', 'groupprod']] = df['product_category'].apply( lambda x: pd.Series(product_category_mapping.get(str(x).lower(), ("Unknown", "Unknown"))) ) else: print("Column 'product_category' not found in DataFrame.") if 'hazard_category' in df.columns: df[['hazcat', 'grouphaz']] = df['hazard_category'].apply( lambda x: pd.Series(hazard_category_mapping.get(str(x).lower(), ("Unknown", "Unknown"))) ) else: print("Column 'hazard_category' not found in DataFrame.") return df # Exemple d'utilisation if __name__ == "__main__": df_main = load_data(MAIN_DATA_URL) df_main = apply_mappings(df_main) print(df_main.head())