Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import requests | |
| from io import BytesIO | |
| # URL du fichier principal | |
| MAIN_DATA_URL = "https://raw.githubusercontent.com/M00N69/RASFFPORTAL/main/unified_rasff_data_with_grouping.csv" | |
| # Charger les données principales | |
| def load_data(url: str) -> pd.DataFrame: | |
| try: | |
| df = pd.read_csv(url, parse_dates=['Date of Case']) | |
| df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes | |
| return df | |
| except Exception as e: | |
| raise Exception(f"Failed to load data: {e}") | |
| # Télécharger et nettoyer les données hebdomadaires | |
| def download_and_clean_weekly_data(year: int, weeks: list) -> pd.DataFrame: | |
| url_template = "https://www.sirene-diffusion.fr/regia/000-rasff/{}/rasff-{}-{}.xls" | |
| dfs = [] | |
| for week in weeks: | |
| url = url_template.format(str(year)[2:], year, str(week).zfill(2)) | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| try: | |
| df = pd.read_excel(BytesIO(response.content)) | |
| df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes | |
| dfs.append(df) | |
| except Exception as e: | |
| print(f"Failed to process data for week {week}: {e}") | |
| else: | |
| print(f"Data for week {week} could not be downloaded.") | |
| return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() | |
| # Mappings pour les catégories de produits et de dangers | |
| product_category_mapping = { | |
| "fruits and vegetables": ("Fruits & Veggies", "Fresh Produce"), | |
| "meat and meat products": ("Meat", "Animal Products"), | |
| "fish and fish products": ("Fish", "Seafood"), | |
| "dairy products": ("Dairy", "Animal Products"), | |
| "cereals and bakery products": ("Cereals", "Grains"), | |
| # Ajouter d'autres mappings ici | |
| } | |
| hazard_category_mapping = { | |
| "microbiological contamination": ("Microbial", "Biological"), | |
| "chemical contamination": ("Chemical", "Toxic"), | |
| "foreign bodies": ("Physical", "Foreign Objects"), | |
| "allergens": ("Allergens", "Health Risk"), | |
| # Ajouter d'autres mappings ici | |
| } | |
| # Appliquer les mappings de catégories | |
| def apply_mappings(df: pd.DataFrame) -> pd.DataFrame: | |
| if 'product_category' in df.columns: | |
| df[['prodcat', 'groupprod']] = df['product_category'].apply( | |
| lambda x: pd.Series(product_category_mapping.get(str(x).lower(), ("Unknown", "Unknown"))) | |
| ) | |
| else: | |
| print("Column 'product_category' not found in DataFrame.") | |
| if 'hazard_category' in df.columns: | |
| df[['hazcat', 'grouphaz']] = df['hazard_category'].apply( | |
| lambda x: pd.Series(hazard_category_mapping.get(str(x).lower(), ("Unknown", "Unknown"))) | |
| ) | |
| else: | |
| print("Column 'hazard_category' not found in DataFrame.") | |
| return df | |
| # Exemple d'utilisation | |
| if __name__ == "__main__": | |
| df_main = load_data(MAIN_DATA_URL) | |
| df_main = apply_mappings(df_main) | |
| print(df_main.head()) | |